From 23eec0e1ee5a81341c0105fac47e717c63772a25 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:21:23 -0500 Subject: [PATCH 01/13] refactor: rename ASK_USER marker to ASK_GUARDIAN (#7507) Co-authored-by: Claude --- ARCHITECTURE.md | 10 +++++----- assistant/src/__tests__/call-bridge.test.ts | 8 ++++---- .../src/__tests__/call-orchestrator.test.ts | 20 +++++++++---------- assistant/src/calls/call-orchestrator.ts | 20 +++++++++---------- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index a3cc9a92443..2015f2d947f 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -3905,7 +3905,7 @@ sequenceDiagram Orch->>CallStore: recordCallEvent() end - alt ASK_USER pattern detected + alt ASK_GUARDIAN pattern detected Orch->>CallStore: createPendingQuestion() Orch->>State: fireCallQuestionNotifier() State->>Session: question callback @@ -3941,7 +3941,7 @@ sequenceDiagram | `assistant/src/calls/twilio-routes.ts` | HTTP webhook handlers: voice webhook (returns TwiML with WS-A/WS-B guardrails), status callback, connect action | | `assistant/src/calls/relay-server.ts` | WebSocket handler for the Twilio ConversationRelay protocol; manages RelayConnection instances per call | | `assistant/src/calls/speaker-identification.ts` | Reusable speaker recognition primitive for voice prompts: extracts provider speaker metadata (top-level and nested fields), resolves stable per-call speaker identities, and emits speaker context for personalization | -| `assistant/src/calls/call-orchestrator.ts` | LLM-driven conversation manager: receives caller utterances, streams responses via Anthropic Claude, detects ASK_USER and END_CALL control markers | +| `assistant/src/calls/call-orchestrator.ts` | LLM-driven conversation manager: receives caller utterances, streams responses via Anthropic Claude, detects ASK_GUARDIAN and END_CALL control markers | | `assistant/src/calls/call-state.ts` | Notifier pattern (Maps with register/unregister/fire helpers) for cross-component communication: question notifiers, completion notifiers, and orchestrator registry | | `assistant/src/calls/call-constants.ts` | Config-backed constants: max call duration, user consultation timeout, silence timeout, denied emergency numbers | | `assistant/src/calls/voice-provider.ts` | Abstract VoiceProvider interface for provider-agnostic call initiation | @@ -3989,7 +3989,7 @@ The bridge function `tryRouteCallMessage()` applies the following decision logic #### Answer path detail -1. **Question emission**: When the LLM emits `[ASK_USER: question]`, the orchestrator creates a pending question in SQLite, fires the question notifier, and transitions to `waiting_on_user` state. +1. **Question emission**: When the LLM emits `[ASK_GUARDIAN: question]`, the orchestrator creates a pending question in SQLite, fires the question notifier, and transitions to `waiting_on_user` state. 2. **In-thread display**: The Session's registered question notifier callback persists an assistant message in the conversation thread (via `conversationStore.addMessage()`) and emits `assistant_text_delta` + `message_complete` events to connected clients. 3. **Auto-consumption**: `tryRouteCallMessage()` is checked before the agent loop in two entrypoints: - **HTTP path**: `DaemonServer.processMessage()` / `persistAndProcessMessage()` in the daemon server. @@ -4026,7 +4026,7 @@ All three tables live in `~/.vellum/workspace/data/db/assistant.db` alongside ex - **`call_events`** — Append-only event log for each call session. Event types include `call_started`, `call_connected`, `caller_spoke`, `assistant_spoke`, `user_question_asked`, `user_answered`, `call_ended`, `call_failed`. For voice prompts, `caller_spoke` payloads include speaker context (`speakerId`, `speakerLabel`, `speakerConfidence`, `speakerSource`) when available. Foreign key to `call_sessions(id)` with cascade delete. Includes a unique index on `(call_session_id, dedupe_key)` for callback idempotency. -- **`call_pending_questions`** — Tracks questions the AI asks the user during a call (via the `[ASK_USER: ...]` pattern). Status lifecycle: `pending` -> `answered`/`expired`/`cancelled`. Foreign key to `call_sessions(id)` with cascade delete. +- **`call_pending_questions`** — Tracks questions the AI asks the user during a call (via the `[ASK_GUARDIAN: ...]` pattern). Status lifecycle: `pending` -> `answered`/`expired`/`cancelled`. Foreign key to `call_sessions(id)` with cascade delete. ### Gateway Twilio Webhook Ingress @@ -4086,7 +4086,7 @@ Both tools and HTTP routes delegate to the same domain functions in `call-domain The CallOrchestrator detects two special markers in the LLM's response text: -- **`[ASK_USER: question]`** — The AI needs to consult the user. The orchestrator creates a pending question, notifies the session via `fireCallQuestionNotifier`, puts the caller on hold, and waits for a user answer (timeout configured via `calls.userConsultTimeoutSeconds`). +- **`[ASK_GUARDIAN: question]`** — The AI needs to consult the guardian. The orchestrator creates a pending question, notifies the session via `fireCallQuestionNotifier`, puts the caller on hold, and waits for a guardian answer (timeout configured via `calls.userConsultTimeoutSeconds`). - **`[END_CALL]`** — The AI has determined the call's purpose is fulfilled. The orchestrator sends a goodbye, closes the ConversationRelay session, and marks the call as completed. Both markers are stripped from the TTS output so the callee never hears the raw control text. diff --git a/assistant/src/__tests__/call-bridge.test.ts b/assistant/src/__tests__/call-bridge.test.ts index e91f0027fd2..204cf92ba77 100644 --- a/assistant/src/__tests__/call-bridge.test.ts +++ b/assistant/src/__tests__/call-bridge.test.ts @@ -261,9 +261,9 @@ describe('call-bridge', () => { }); test('routes answer to orchestrator when waiting and returns handled:true', async () => { - // Setup: trigger ASK_USER to put orchestrator in waiting_on_user state + // Setup: trigger ASK_GUARDIAN to put orchestrator in waiting_on_user state mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_USER: Preferred date?]']), + createMockStream(['Hold on. [ASK_GUARDIAN: Preferred date?]']), ); ensureConversation('conv-bridge'); @@ -327,9 +327,9 @@ describe('call-bridge', () => { }); test('prefers answer path over instruction path when pending question exists', async () => { - // Setup: trigger ASK_USER to put orchestrator in waiting_on_user state + // Setup: trigger ASK_GUARDIAN to put orchestrator in waiting_on_user state mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_USER: Budget range?]']), + createMockStream(['Hold on. [ASK_GUARDIAN: Budget range?]']), ); ensureConversation('conv-prefer-answer'); diff --git a/assistant/src/__tests__/call-orchestrator.test.ts b/assistant/src/__tests__/call-orchestrator.test.ts index cf86e2eb53f..18f6a12c9a6 100644 --- a/assistant/src/__tests__/call-orchestrator.test.ts +++ b/assistant/src/__tests__/call-orchestrator.test.ts @@ -287,11 +287,11 @@ describe('call-orchestrator', () => { orchestrator.destroy(); }); - // ── ASK_USER pattern ────────────────────────────────────────────── + // ── ASK_GUARDIAN pattern ────────────────────────────────────────── - test('ASK_USER pattern: detects pattern, creates pending question, enters waiting_on_user', async () => { + test('ASK_GUARDIAN pattern: detects pattern, creates pending question, enters waiting_on_user', async () => { mockStreamFn.mockImplementation(() => - createMockStream(['Let me check on that. ', '[ASK_USER: What date works best?]']), + createMockStream(['Let me check on that. ', '[ASK_GUARDIAN: What date works best?]']), ); const { session, relay, orchestrator } = setupOrchestrator('Book appointment'); @@ -307,9 +307,9 @@ describe('call-orchestrator', () => { const updatedSession = getCallSession(session.id); expect(updatedSession!.status).toBe('waiting_on_user'); - // The ASK_USER marker text should NOT appear in the relay tokens + // The ASK_GUARDIAN marker text should NOT appear in the relay tokens const allText = relay.sentTokens.map((t) => t.token).join(''); - expect(allText).not.toContain('[ASK_USER:'); + expect(allText).not.toContain('[ASK_GUARDIAN:'); orchestrator.destroy(); }); @@ -366,9 +366,9 @@ describe('call-orchestrator', () => { // ── handleUserAnswer ────────────────────────────────────────────── test('handleUserAnswer: returns true immediately and fires LLM asynchronously', async () => { - // First utterance triggers ASK_USER + // First utterance triggers ASK_GUARDIAN mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_USER: Preferred time?]']), + createMockStream(['Hold on. [ASK_GUARDIAN: Preferred time?]']), ); const { relay, orchestrator } = setupOrchestrator(); @@ -402,7 +402,7 @@ describe('call-orchestrator', () => { test('mid-call question flow: unavailable time → ask user → user confirms → resumed call', async () => { // Step 1: Caller says "7:30" but it's unavailable. The LLM asks the user. mockStreamFn.mockImplementation(() => - createMockStream(['I\'m sorry, 7:30 is not available. ', '[ASK_USER: Is 8:00 okay instead?]']), + createMockStream(['I\'m sorry, 7:30 is not available. ', '[ASK_GUARDIAN: Is 8:00 okay instead?]']), ); const { session, relay, orchestrator } = setupOrchestrator('Schedule a haircut'); @@ -892,9 +892,9 @@ describe('call-orchestrator', () => { }); test('handleUserInstruction: does not trigger LLM when orchestrator is not idle', async () => { - // First, trigger ASK_USER so orchestrator enters waiting_on_user + // First, trigger ASK_GUARDIAN so orchestrator enters waiting_on_user mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_USER: What time?]']), + createMockStream(['Hold on. [ASK_GUARDIAN: What time?]']), ); const { session, orchestrator } = setupOrchestrator(); diff --git a/assistant/src/calls/call-orchestrator.ts b/assistant/src/calls/call-orchestrator.ts index b1410570807..b12a39ce780 100644 --- a/assistant/src/calls/call-orchestrator.ts +++ b/assistant/src/calls/call-orchestrator.ts @@ -26,8 +26,8 @@ const log = getLogger('call-orchestrator'); type OrchestratorState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking'; -const ASK_USER_CAPTURE_REGEX = /\[ASK_USER:\s*(.+?)\]/; -const ASK_USER_MARKER_REGEX = /\[ASK_USER:\s*.+?\]/g; +const ASK_GUARDIAN_CAPTURE_REGEX = /\[ASK_GUARDIAN:\s*(.+?)\]/; +const ASK_GUARDIAN_MARKER_REGEX = /\[ASK_GUARDIAN:\s*.+?\]/g; const USER_ANSWERED_MARKER_REGEX = /\[USER_ANSWERED:\s*.+?\]/g; const USER_INSTRUCTION_MARKER_REGEX = /\[USER_INSTRUCTION:\s*.+?\]/g; const CALL_OPENING_MARKER_REGEX = /\[CALL_OPENING\]/g; @@ -37,7 +37,7 @@ const END_CALL_MARKER = '[END_CALL]'; function stripInternalSpeechMarkers(text: string): string { return text - .replace(ASK_USER_MARKER_REGEX, '') + .replace(ASK_GUARDIAN_MARKER_REGEX, '') .replace(USER_ANSWERED_MARKER_REGEX, '') .replace(USER_INSTRUCTION_MARKER_REGEX, '') .replace(CALL_OPENING_MARKER_REGEX, '') @@ -256,7 +256,7 @@ export class CallOrchestrator { '0. When introducing yourself, refer to yourself as an assistant. Avoid the phrase "AI assistant" unless directly asked.', disclosureRule, '2. Be concise — phone conversations should be brief and natural.', - '3. If the callee asks something you don\'t know, include [ASK_USER: your question here] in your response along with a hold message like "Let me check on that for you."', + '3. If the callee asks something you don\'t know, include [ASK_GUARDIAN: your question here] in your response along with a hold message like "Let me check on that for you."', '4. If the callee provides information preceded by [USER_ANSWERED: ...], use that answer naturally in the conversation.', '5. If you see [USER_INSTRUCTION: ...], treat it as a high-priority steering directive from your user. Follow the instruction immediately, adjusting your approach or response accordingly.', '6. When the call\'s purpose is fulfilled, include [END_CALL] in your response along with a polite goodbye.', @@ -312,7 +312,7 @@ export class CallOrchestrator { { signal: runSignal }, ); - // Buffer incoming tokens so we can strip control markers ([ASK_USER:...], [END_CALL]) + // Buffer incoming tokens so we can strip control markers ([ASK_GUARDIAN:...], [END_CALL]) // before they reach TTS. We hold text whenever an unmatched '[' appears, since it // could be the start of a control marker. let ttsBuffer = ''; @@ -339,17 +339,17 @@ export class CallOrchestrator { // The check must be bidirectional: // - When the buffer is shorter than the prefix (e.g. "[ASK"), the // buffer is a prefix of the control tag → hold it. - // - When the buffer is longer than the prefix (e.g. "[ASK_USER: what"), + // - When the buffer is longer than the prefix (e.g. "[ASK_GUARDIAN: what"), // the buffer starts with the control tag prefix → hold it (the // variable-length payload hasn't been closed yet). const afterBracket = ttsBuffer; const couldBeControl = - '[ASK_USER:'.startsWith(afterBracket) || + '[ASK_GUARDIAN:'.startsWith(afterBracket) || '[USER_ANSWERED:'.startsWith(afterBracket) || '[USER_INSTRUCTION:'.startsWith(afterBracket) || '[CALL_OPENING]'.startsWith(afterBracket) || '[END_CALL]'.startsWith(afterBracket) || - afterBracket.startsWith('[ASK_USER:') || + afterBracket.startsWith('[ASK_GUARDIAN:') || afterBracket.startsWith('[USER_ANSWERED:') || afterBracket.startsWith('[USER_INSTRUCTION:') || afterBracket === '[CALL_OPENING' || @@ -412,8 +412,8 @@ export class CallOrchestrator { } } - // Check for ASK_USER pattern - const askMatch = responseText.match(ASK_USER_CAPTURE_REGEX); + // Check for ASK_GUARDIAN pattern + const askMatch = responseText.match(ASK_GUARDIAN_CAPTURE_REGEX); if (askMatch) { const questionText = askMatch[1]; createPendingQuestion(this.callSessionId, questionText); From d12f697f676a8c84c147eda03b45c48de5491f5f Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:25:53 -0500 Subject: [PATCH 02/13] feat: add voice channel identity and per-call voice conversations (#7512) Co-authored-by: Claude --- assistant/src/calls/call-domain.ts | 26 ++++++++++- assistant/src/calls/call-store.ts | 5 ++- assistant/src/calls/types.ts | 1 + assistant/src/memory/db-init.ts | 4 ++ assistant/src/memory/schema-migration.ts | 18 ++++++++ assistant/src/memory/schema.ts | 1 + .../src/runtime/channel-readiness-service.ts | 44 ++++++++++++++++++- .../src/runtime/channel-readiness-types.ts | 2 +- 8 files changed, 97 insertions(+), 4 deletions(-) diff --git a/assistant/src/calls/call-domain.ts b/assistant/src/calls/call-domain.ts index 11f62f4fb63..a98ccec7509 100644 --- a/assistant/src/calls/call-domain.ts +++ b/assistant/src/calls/call-domain.ts @@ -26,6 +26,8 @@ import { getSecureKey } from '../security/secure-keys.js'; import type { CallSession } from './types.js'; import { VALID_CALLER_IDENTITY_MODES } from '../config/schema.js'; import type { AssistantConfig } from '../config/types.js'; +import { getOrCreateConversation } from '../memory/conversation-key-store.js'; +import { upsertBinding } from '../memory/external-conversation-store.js'; const log = getLogger('call-domain'); @@ -222,10 +224,32 @@ export async function startCall(input: StartCallInput): Promise>, + updates: Partial>, ): void { const db = getDb(); diff --git a/assistant/src/calls/types.ts b/assistant/src/calls/types.ts index 755e1c0e16d..2dd381ae503 100644 --- a/assistant/src/calls/types.ts +++ b/assistant/src/calls/types.ts @@ -14,6 +14,7 @@ export interface CallSession { callerIdentityMode: string | null; callerIdentitySource: string | null; assistantId: string | null; + initiatedFromConversationId?: string | null; startedAt: number | null; endedAt: number | null; lastError: string | null; diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts index ce638c35b7f..47560eb14bc 100644 --- a/assistant/src/memory/db-init.ts +++ b/assistant/src/memory/db-init.ts @@ -12,6 +12,7 @@ import { migrateLlmUsageEventsDropAssistantId, migrateExtConvBindingsChannelChatUnique, migrateCallSessionsProviderSidDedup, + migrateCallSessionsAddInitiatedFrom, migrateMemoryFtsBackfill, } from './schema-migration.js'; @@ -785,6 +786,9 @@ export function initializeDb(): void { // Persist assistantId so the webhook path can resolve assistant-scoped Twilio numbers try { database.run(/*sql*/ `ALTER TABLE call_sessions ADD COLUMN assistant_id TEXT`); } catch { /* already exists */ } + // Track which conversation initiated the call (the chat where call_start was invoked) + migrateCallSessionsAddInitiatedFrom(database); + // Unique constraint: at most one non-null provider_call_sid per (provider, provider_call_sid). // On upgraded databases that pre-date this constraint, duplicate rows may exist; deduplicate // them first to avoid a UNIQUE constraint failure that would prevent startup. diff --git a/assistant/src/memory/schema-migration.ts b/assistant/src/memory/schema-migration.ts index b92550718e7..f8322fcc3e9 100644 --- a/assistant/src/memory/schema-migration.ts +++ b/assistant/src/memory/schema-migration.ts @@ -982,3 +982,21 @@ export function migrateCallSessionsProviderSidDedup(database: Db): void { throw e; } } + +/** + * Add the `initiated_from_conversation_id` column to `call_sessions` so + * voice calls can track which conversation triggered them while pointing + * the session's `conversation_id` to a dedicated per-call voice conversation. + * + * Uses ALTER TABLE ... ADD COLUMN which is a no-op if the column already + * exists (caught via try/catch, matching the existing migration pattern in + * db-init.ts for similar additive columns). + */ +export function migrateCallSessionsAddInitiatedFrom(database: Db): void { + const raw = (database as unknown as { $client: Database }).$client; + try { + raw.exec(/*sql*/ `ALTER TABLE call_sessions ADD COLUMN initiated_from_conversation_id TEXT`); + } catch { + // Column already exists — nothing to do. + } +} diff --git a/assistant/src/memory/schema.ts b/assistant/src/memory/schema.ts index ddc3000fe15..d4cc135de31 100644 --- a/assistant/src/memory/schema.ts +++ b/assistant/src/memory/schema.ts @@ -552,6 +552,7 @@ export const callSessions = sqliteTable('call_sessions', { callerIdentityMode: text('caller_identity_mode'), callerIdentitySource: text('caller_identity_source'), assistantId: text('assistant_id'), + initiatedFromConversationId: text('initiated_from_conversation_id'), startedAt: integer('started_at'), endedAt: integer('ended_at'), lastError: text('last_error'), diff --git a/assistant/src/runtime/channel-readiness-service.ts b/assistant/src/runtime/channel-readiness-service.ts index 7d853524305..f627a0c66af 100644 --- a/assistant/src/runtime/channel-readiness-service.ts +++ b/assistant/src/runtime/channel-readiness-service.ts @@ -165,6 +165,47 @@ const smsProbe: ChannelProbe = { }, }; +// ── Voice Probe ───────────────────────────────────────────────────────────── + +const voiceProbe: ChannelProbe = { + channel: 'voice', + runLocalChecks(context?: ChannelProbeContext): ReadinessCheckResult[] { + const results: ReadinessCheckResult[] = []; + + const hasCreds = hasTwilioCredentials(); + results.push({ + name: 'twilio_credentials', + passed: hasCreds, + message: hasCreds + ? 'Twilio credentials are configured' + : 'Twilio Account SID and Auth Token are not configured', + }); + + const phoneNumber = process.env.TWILIO_PHONE_NUMBER + || getSecureKey('credential:twilio:phone_number') + || ''; + const hasPhone = !!phoneNumber; + results.push({ + name: 'phone_number', + passed: hasPhone, + message: hasPhone + ? 'Phone number is assigned for voice calls' + : 'No phone number assigned for voice calls', + }); + + const hasIngress = hasIngressConfigured(); + results.push({ + name: 'ingress', + passed: hasIngress, + message: hasIngress + ? 'Public ingress URL is configured' + : 'Public ingress URL is not configured or disabled', + }); + + return results; + }, +}; + // ── Telegram Probe ────────────────────────────────────────────────────────── const telegramProbe: ChannelProbe = { @@ -340,10 +381,11 @@ export class ChannelReadinessService { // ── Factory ───────────────────────────────────────────────────────────────── -/** Create a service instance with built-in SMS and Telegram probes registered. */ +/** Create a service instance with built-in SMS, Voice, and Telegram probes registered. */ export function createReadinessService(): ChannelReadinessService { const service = new ChannelReadinessService(); service.registerProbe(smsProbe); + service.registerProbe(voiceProbe); service.registerProbe(telegramProbe); return service; } diff --git a/assistant/src/runtime/channel-readiness-types.ts b/assistant/src/runtime/channel-readiness-types.ts index 19d933a340a..4cfc1ca4d8b 100644 --- a/assistant/src/runtime/channel-readiness-types.ts +++ b/assistant/src/runtime/channel-readiness-types.ts @@ -1,7 +1,7 @@ // Channel readiness types — reusable primitive for all channels. /** Logical channel identifier. Well-known channels have literal types; custom channels use string. */ -export type ChannelId = 'sms' | 'telegram' | string; +export type ChannelId = 'sms' | 'telegram' | 'voice' | string; /** Result of a single readiness check (local or remote). */ export interface ReadinessCheckResult { From b9d6276592b3f7134739212f1de57e8a94ed1725 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:31:16 -0500 Subject: [PATCH 03/13] =?UTF-8?q?fix:=20address=20M2=20review=20feedback?= =?UTF-8?q?=20=E2=80=94=20call=20session=20lookup=20+=20voice=20probe=20(#?= =?UTF-8?q?7524)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Claude --- assistant/src/calls/call-store.ts | 7 +++- .../src/runtime/channel-readiness-service.ts | 38 ++++++++++++++++--- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/assistant/src/calls/call-store.ts b/assistant/src/calls/call-store.ts index 126d7ecfc4d..cfdbe414948 100644 --- a/assistant/src/calls/call-store.ts +++ b/assistant/src/calls/call-store.ts @@ -1,4 +1,4 @@ -import { eq, and, notInArray, desc } from 'drizzle-orm'; +import { eq, and, or, notInArray, desc } from 'drizzle-orm'; import { v4 as uuid } from 'uuid'; import { getDb } from '../memory/db.js'; import { callSessions, callEvents, callPendingQuestions } from '../memory/schema.js'; @@ -117,7 +117,10 @@ export function getActiveCallSessionForConversation(conversationId: string): Cal .from(callSessions) .where( and( - eq(callSessions.conversationId, conversationId), + or( + eq(callSessions.conversationId, conversationId), + eq(callSessions.initiatedFromConversationId, conversationId), + ), notInArray(callSessions.status, ['completed', 'failed', 'cancelled']), ), ) diff --git a/assistant/src/runtime/channel-readiness-service.ts b/assistant/src/runtime/channel-readiness-service.ts index f627a0c66af..cffcea2eed8 100644 --- a/assistant/src/runtime/channel-readiness-service.ts +++ b/assistant/src/runtime/channel-readiness-service.ts @@ -167,6 +167,30 @@ const smsProbe: ChannelProbe = { // ── Voice Probe ───────────────────────────────────────────────────────────── +/** + * Resolve voice from-number with the same precedence as SMS: + * assistant mapping -> env override -> config sms.phoneNumber -> secure key fallback. + * + * Voice and SMS share the same Twilio phone number infrastructure, so the + * resolution logic is identical to resolveSmsPhoneNumber. + */ +function resolveVoicePhoneNumber(assistantId?: string): string { + try { + const raw = loadRawConfig(); + const smsConfig = (raw?.sms ?? {}) as Record; + const mapped = getAssistantMappedPhoneNumber(smsConfig, assistantId); + return mapped + || process.env.TWILIO_PHONE_NUMBER + || (smsConfig.phoneNumber as string) + || getSecureKey('credential:twilio:phone_number') + || ''; + } catch { + return process.env.TWILIO_PHONE_NUMBER + || getSecureKey('credential:twilio:phone_number') + || ''; + } +} + const voiceProbe: ChannelProbe = { channel: 'voice', runLocalChecks(context?: ChannelProbeContext): ReadinessCheckResult[] { @@ -181,16 +205,18 @@ const voiceProbe: ChannelProbe = { : 'Twilio Account SID and Auth Token are not configured', }); - const phoneNumber = process.env.TWILIO_PHONE_NUMBER - || getSecureKey('credential:twilio:phone_number') - || ''; - const hasPhone = !!phoneNumber; + const resolvedNumber = resolveVoicePhoneNumber(context?.assistantId); + const hasPhone = !!resolvedNumber || (!context?.assistantId && hasAnyAssistantMappedPhoneNumberSafe()); results.push({ name: 'phone_number', passed: hasPhone, message: hasPhone - ? 'Phone number is assigned for voice calls' - : 'No phone number assigned for voice calls', + ? (context?.assistantId && !resolvedNumber + ? `Assistant ${context.assistantId} has no direct mapping, but phone numbers are assigned` + : 'Phone number is assigned for voice calls') + : (context?.assistantId + ? `No phone number assigned for assistant ${context.assistantId}` + : 'No phone number assigned for voice calls'), }); const hasIngress = hasIngressConfigured(); From bbf649732e5ef688ae3e3c9e3a1e1b7fa8bf9d08 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:39:12 -0500 Subject: [PATCH 04/13] feat: voice event projection, pointer messages, and bridge removal (#7529) Co-authored-by: Claude --- assistant/src/__tests__/call-bridge.test.ts | 517 ------------------ .../__tests__/session-process-bridge.test.ts | 244 --------- assistant/src/calls/call-bridge.ts | 168 ------ assistant/src/calls/call-domain.ts | 7 + assistant/src/calls/call-orchestrator.ts | 20 +- assistant/src/calls/call-pointer-messages.ts | 50 ++ assistant/src/calls/relay-server.ts | 16 + .../bundled-skills/phone-calls/SKILL.md | 16 +- assistant/src/daemon/server.ts | 43 -- assistant/src/daemon/session-notifiers.ts | 2 +- assistant/src/daemon/session-process.ts | 61 +-- 11 files changed, 101 insertions(+), 1043 deletions(-) delete mode 100644 assistant/src/__tests__/call-bridge.test.ts delete mode 100644 assistant/src/__tests__/session-process-bridge.test.ts delete mode 100644 assistant/src/calls/call-bridge.ts create mode 100644 assistant/src/calls/call-pointer-messages.ts diff --git a/assistant/src/__tests__/call-bridge.test.ts b/assistant/src/__tests__/call-bridge.test.ts deleted file mode 100644 index 204cf92ba77..00000000000 --- a/assistant/src/__tests__/call-bridge.test.ts +++ /dev/null @@ -1,517 +0,0 @@ -import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test'; -import { mkdtempSync, rmSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { EventEmitter } from 'node:events'; - -const testDir = mkdtempSync(join(tmpdir(), 'call-bridge-test-')); - -// ── Platform + logger mocks (must come before any source imports) ──── - -mock.module('../util/platform.js', () => ({ - getDataDir: () => testDir, - isMacOS: () => process.platform === 'darwin', - isLinux: () => process.platform === 'linux', - isWindows: () => process.platform === 'win32', - getSocketPath: () => join(testDir, 'test.sock'), - getPidPath: () => join(testDir, 'test.pid'), - getDbPath: () => join(testDir, 'test.db'), - getLogPath: () => join(testDir, 'test.log'), - ensureDataDir: () => {}, -})); - -mock.module('../util/logger.js', () => ({ - getLogger: () => - new Proxy({} as Record, { - get: () => () => {}, - }), -})); - -// ── Config mock ───────────────────────────────────────────────────── - -mock.module('../config/loader.js', () => ({ - getConfig: () => ({ - apiKeys: { anthropic: 'test-key' }, - memory: { enabled: false }, - calls: { - enabled: true, - provider: 'twilio', - maxDurationSeconds: 3600, - userConsultTimeoutSeconds: 120, - disclosure: { enabled: false, text: '' }, - safety: { denyCategories: [] }, - }, - }), -})); - -// ── Anthropic SDK mock ────────────────────────────────────────────── - -function createMockStream(tokens: string[]) { - const emitter = new EventEmitter(); - const fullText = tokens.join(''); - - const stream = { - on: (event: string, handler: (...args: unknown[]) => void) => { - emitter.on(event, handler); - return stream; - }, - finalMessage: () => { - for (const token of tokens) { - emitter.emit('text', token); - } - return Promise.resolve({ - content: [{ type: 'text', text: fullText }], - }); - }, - }; - - return stream; -} - -const mockStreamFn = mock((..._args: unknown[]) => createMockStream(['Hello'])); - -mock.module('@anthropic-ai/sdk', () => ({ - default: class MockAnthropic { - messages = { - stream: (...args: unknown[]) => mockStreamFn(...args), - }; - }, -})); - -// ── Import source modules after all mocks ─────────────────────────── - -import { initializeDb, getDb, resetDb } from '../memory/db.js'; -import { conversations } from '../memory/schema.js'; -import { - createCallSession, - getPendingQuestion, - updateCallSession, - recordCallEvent, - createPendingQuestion, -} from '../calls/call-store.js'; -import { - registerCallQuestionNotifier, - unregisterCallQuestionNotifier, - registerCallTranscriptNotifier, - unregisterCallTranscriptNotifier, - fireCallTranscriptNotifier, - registerCallCompletionNotifier, - unregisterCallCompletionNotifier, - fireCallQuestionNotifier, - fireCallCompletionNotifier, -} from '../calls/call-state.js'; -import { CallOrchestrator } from '../calls/call-orchestrator.js'; -import { tryRouteCallMessage } from '../calls/call-bridge.js'; -import * as conversationStore from '../memory/conversation-store.js'; -import type { RelayConnection } from '../calls/relay-server.js'; - -initializeDb(); - -afterAll(() => { - resetDb(); - try { - rmSync(testDir, { recursive: true }); - } catch { - /* best effort */ - } -}); - -// ── Relay mock factory ────────────────────────────────────────────── - -interface MockRelay extends RelayConnection { - sentTokens: Array<{ token: string; last: boolean }>; - endCalled: boolean; -} - -function createMockRelay(): MockRelay { - const state = { - sentTokens: [] as Array<{ token: string; last: boolean }>, - _endCalled: false, - }; - - return { - get sentTokens() { return state.sentTokens; }, - get endCalled() { return state._endCalled; }, - sendTextToken(token: string, last: boolean) { - state.sentTokens.push({ token, last }); - }, - endSession(_reason?: string) { - state._endCalled = true; - }, - } as unknown as MockRelay; -} - -// ── Helpers ───────────────────────────────────────────────────────── - -let ensuredConvIds = new Set(); -function ensureConversation(id: string): void { - if (ensuredConvIds.has(id)) return; - const db = getDb(); - const now = Date.now(); - db.insert(conversations).values({ - id, - title: `Test conversation ${id}`, - createdAt: now, - updatedAt: now, - }).run(); - ensuredConvIds.add(id); -} - -function resetTables() { - const db = getDb(); - db.run('DELETE FROM call_pending_questions'); - db.run('DELETE FROM call_events'); - db.run('DELETE FROM call_sessions'); - db.run('DELETE FROM messages'); - db.run('DELETE FROM conversations'); - ensuredConvIds = new Set(); -} - -function getMessagesForConversation(conversationId: string) { - return conversationStore.getMessages(conversationId); -} - -describe('call-bridge', () => { - beforeEach(() => { - resetTables(); - mockStreamFn.mockImplementation(() => createMockStream(['Hello'])); - }); - - // ── tryRouteCallMessage — answer path ─────────────────────── - - test('returns handled:false when no active call exists', async () => { - ensureConversation('conv-no-call'); - const result = await tryRouteCallMessage('conv-no-call', 'some answer'); - expect(result.handled).toBe(false); - expect(result.reason).toBe('no_active_call'); - }); - - test('returns instruction_relay_failed (consumed) when call exists but no orchestrator and no pending question', async () => { - ensureConversation('conv-no-orch'); - createCallSession({ - conversationId: 'conv-no-orch', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - const result = await tryRouteCallMessage('conv-no-orch', 'some instruction'); - expect(result.handled).toBe(true); - expect(result.reason).toBe('instruction_relay_failed'); - expect(result.userFacingText).toBe('Failed to relay instruction to the active call.'); - }); - - test('returns handled:false when orchestrator is not found (call still active but no orchestrator)', async () => { - ensureConversation('conv-ended'); - const callSession = createCallSession({ - conversationId: 'conv-ended', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - // Leave the session in an active (non-terminal) state but do NOT register an orchestrator. - // This simulates a race where the orchestrator was destroyed but the session hasn't - // been marked terminal yet. - updateCallSession(callSession.id, { status: 'in_progress' }); - - // Create a pending question without an orchestrator - createPendingQuestion(callSession.id, 'What time?'); - - const result = await tryRouteCallMessage('conv-ended', 'Too late'); - expect(result.handled).toBe(false); - expect(result.reason).toBe('orchestrator_not_found'); - }); - - test('returns no_active_call when call has already completed', async () => { - ensureConversation('conv-completed'); - const callSession = createCallSession({ - conversationId: 'conv-completed', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - // Mark the call as completed — getActiveCallSessionForConversation will return null - updateCallSession(callSession.id, { status: 'completed', endedAt: Date.now() }); - - const result = await tryRouteCallMessage('conv-completed', 'Too late'); - expect(result.handled).toBe(false); - expect(result.reason).toBe('no_active_call'); - }); - - test('returns handled:false when orchestrator is not in waiting_on_user state', async () => { - ensureConversation('conv-not-waiting'); - const callSession = createCallSession({ - conversationId: 'conv-not-waiting', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - - // Create orchestrator (state=idle by default) - const relay = createMockRelay(); - const orchestrator = new CallOrchestrator(callSession.id, relay as unknown as RelayConnection, null); - - // Create a pending question in the DB but orchestrator is idle, not waiting_on_user - createPendingQuestion(callSession.id, 'What time?'); - - const result = await tryRouteCallMessage('conv-not-waiting', 'answer'); - expect(result.handled).toBe(false); - expect(result.reason).toBe('orchestrator_not_waiting'); - - orchestrator.destroy(); - }); - - test('routes answer to orchestrator when waiting and returns handled:true', async () => { - // Setup: trigger ASK_GUARDIAN to put orchestrator in waiting_on_user state - mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_GUARDIAN: Preferred date?]']), - ); - - ensureConversation('conv-bridge'); - const callSession = createCallSession({ - conversationId: 'conv-bridge', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - - const relay = createMockRelay(); - const orchestrator = new CallOrchestrator(callSession.id, relay as unknown as RelayConnection, 'test task'); - - await orchestrator.handleCallerUtterance('I need a reservation'); - - // Verify the orchestrator is now waiting - expect(orchestrator.getState()).toBe('waiting_on_user'); - - // Now provide the answer — set up mock for the LLM call after answer - mockStreamFn.mockImplementation(() => createMockStream(['Great, booking for tomorrow.'])); - - const result = await tryRouteCallMessage('conv-bridge', 'Tomorrow at noon'); - expect(result.handled).toBe(true); - - // Wait for the fire-and-forget LLM call - await new Promise((r) => setTimeout(r, 50)); - - // Verify the pending question was answered - const question = getPendingQuestion(callSession.id); - // After answering, there should be no pending question left - expect(question).toBeNull(); - - orchestrator.destroy(); - }); - - // ── tryRouteCallMessage — instruction path ──────────────────── - - test('routes instruction to orchestrator when active call exists with no pending question', async () => { - ensureConversation('conv-instruct'); - const callSession = createCallSession({ - conversationId: 'conv-instruct', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - - const relay = createMockRelay(); - const orchestrator = new CallOrchestrator(callSession.id, relay as unknown as RelayConnection, 'test task'); - - const result = await tryRouteCallMessage('conv-instruct', 'Please ask about pricing'); - expect(result.handled).toBe(true); - expect(result.userFacingText).toBe('Instruction relayed to active call.'); - - // Verify acknowledgement was persisted - const msgs = getMessagesForConversation('conv-instruct'); - const ackMsg = msgs.find((m) => m.content.includes('Instruction relayed')); - expect(ackMsg).toBeDefined(); - expect(ackMsg!.role).toBe('assistant'); - - orchestrator.destroy(); - }); - - test('prefers answer path over instruction path when pending question exists', async () => { - // Setup: trigger ASK_GUARDIAN to put orchestrator in waiting_on_user state - mockStreamFn.mockImplementation(() => - createMockStream(['Hold on. [ASK_GUARDIAN: Budget range?]']), - ); - - ensureConversation('conv-prefer-answer'); - const callSession = createCallSession({ - conversationId: 'conv-prefer-answer', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - - const relay = createMockRelay(); - const orchestrator = new CallOrchestrator(callSession.id, relay as unknown as RelayConnection, 'test task'); - - await orchestrator.handleCallerUtterance('What is your budget?'); - expect(orchestrator.getState()).toBe('waiting_on_user'); - - // Mock the next LLM call - mockStreamFn.mockImplementation(() => createMockStream(['Got it, thanks.'])); - - // This should route as answer, not instruction - const result = await tryRouteCallMessage('conv-prefer-answer', '$500'); - expect(result.handled).toBe(true); - - // Wait for fire-and-forget LLM call - await new Promise((r) => setTimeout(r, 50)); - - // Should have answered the pending question, not relayed as instruction - const question = getPendingQuestion(callSession.id); - expect(question).toBeNull(); - - // No instruction acknowledgement should be persisted - const msgs = getMessagesForConversation('conv-prefer-answer'); - const ackMsg = msgs.find((m) => m.content.includes('Instruction relayed')); - expect(ackMsg).toBeUndefined(); - - orchestrator.destroy(); - }); - - test('instruction relay failure persists notice and is consumed (handled:true)', async () => { - ensureConversation('conv-no-orch-instruct'); - createCallSession({ - conversationId: 'conv-no-orch-instruct', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - - // No orchestrator registered — relay should fail but still be consumed - const result = await tryRouteCallMessage('conv-no-orch-instruct', 'Change the topic'); - expect(result.handled).toBe(true); - expect(result.reason).toBe('instruction_relay_failed'); - expect(result.userFacingText).toBe('Failed to relay instruction to the active call.'); - - // Verify failure notice was persisted in-thread - const msgs = getMessagesForConversation('conv-no-orch-instruct'); - const failMsg = msgs.find((m) => m.content.includes('Failed to relay')); - expect(failMsg).toBeDefined(); - expect(failMsg!.role).toBe('assistant'); - }); - - // ── Call question notifier ────────────────────────────────────── - - test('call question notifier persists assistant message and emits events', () => { - ensureConversation('conv-notifier-q'); - - const emittedEvents: Array<{ type: string; text?: string }> = []; - const sendToClient = (msg: { type: string; text?: string }) => { - emittedEvents.push(msg); - }; - - // Register notifier (as Session would) - registerCallQuestionNotifier('conv-notifier-q', (_callSessionId: string, question: string) => { - const questionText = `**Live call question**:\n\n${question}\n\n_Reply in this thread to answer._`; - conversationStore.addMessage( - 'conv-notifier-q', - 'assistant', - JSON.stringify([{ type: 'text', text: questionText }]), - ); - sendToClient({ type: 'assistant_text_delta', text: questionText }); - sendToClient({ type: 'message_complete' }); - }); - - // Fire the notifier - fireCallQuestionNotifier('conv-notifier-q', 'call-session-1', 'What time works best?'); - - // Verify message was persisted - const msgs = getMessagesForConversation('conv-notifier-q'); - expect(msgs.length).toBe(1); - expect(msgs[0].role).toBe('assistant'); - expect(msgs[0].content).toContain('What time works best?'); - - // Verify events were emitted - expect(emittedEvents.length).toBe(2); - expect(emittedEvents[0].type).toBe('assistant_text_delta'); - expect(emittedEvents[0].text).toContain('What time works best?'); - expect(emittedEvents[1].type).toBe('message_complete'); - - unregisterCallQuestionNotifier('conv-notifier-q'); - }); - - // ── Call transcript notifier ───────────────────────────────────── - - test('call transcript notifier persists transcript line and emits events', () => { - ensureConversation('conv-notifier-t'); - - const emittedEvents: Array<{ type: string; text?: string }> = []; - const sendToClient = (msg: { type: string; text?: string }) => { - emittedEvents.push(msg); - }; - - registerCallTranscriptNotifier('conv-notifier-t', (_callSessionId: string, speaker: 'caller' | 'assistant', text: string) => { - const speakerLabel = speaker === 'caller' ? 'Caller' : 'Assistant'; - const transcriptText = `**Live call transcript**\n${speakerLabel}: ${text}`; - conversationStore.addMessage( - 'conv-notifier-t', - 'assistant', - JSON.stringify([{ type: 'text', text: transcriptText }]), - ); - sendToClient({ type: 'assistant_text_delta', text: transcriptText }); - sendToClient({ type: 'message_complete' }); - }); - - fireCallTranscriptNotifier('conv-notifier-t', 'call-session-1', 'caller', 'Can you confirm the appointment?'); - - const msgs = getMessagesForConversation('conv-notifier-t'); - expect(msgs.length).toBe(1); - expect(msgs[0].role).toBe('assistant'); - expect(msgs[0].content).toContain('Caller: Can you confirm the appointment?'); - - expect(emittedEvents.length).toBe(2); - expect(emittedEvents[0].type).toBe('assistant_text_delta'); - expect(emittedEvents[0].text).toContain('Live call transcript'); - expect(emittedEvents[1].type).toBe('message_complete'); - - unregisterCallTranscriptNotifier('conv-notifier-t'); - }); - - // ── Call completion notifier ──────────────────────────────────── - - test('call completion notifier persists summary and emits events', () => { - ensureConversation('conv-notifier-c'); - - const emittedEvents: Array<{ type: string; text?: string }> = []; - const sendToClient = (msg: { type: string; text?: string }) => { - emittedEvents.push(msg); - }; - - // Create a call session so getCallSession works - const callSession = createCallSession({ - conversationId: 'conv-notifier-c', - provider: 'twilio', - fromNumber: '+15551111111', - toNumber: '+15552222222', - }); - updateCallSession(callSession.id, { status: 'completed', startedAt: Date.now() - 30000, endedAt: Date.now() }); - recordCallEvent(callSession.id, 'call_started', {}); - recordCallEvent(callSession.id, 'call_ended', {}); - - registerCallCompletionNotifier('conv-notifier-c', (_callSessionId: string) => { - const summaryText = `**Call completed**. Events recorded.`; - conversationStore.addMessage( - 'conv-notifier-c', - 'assistant', - JSON.stringify([{ type: 'text', text: summaryText }]), - ); - sendToClient({ type: 'assistant_text_delta', text: summaryText }); - sendToClient({ type: 'message_complete' }); - }); - - fireCallCompletionNotifier('conv-notifier-c', callSession.id); - - // Verify message persisted - const msgs = getMessagesForConversation('conv-notifier-c'); - expect(msgs.length).toBe(1); - expect(msgs[0].role).toBe('assistant'); - expect(msgs[0].content).toContain('Call completed'); - - // Verify events emitted - expect(emittedEvents.length).toBe(2); - expect(emittedEvents[0].type).toBe('assistant_text_delta'); - expect(emittedEvents[1].type).toBe('message_complete'); - - unregisterCallCompletionNotifier('conv-notifier-c'); - }); -}); diff --git a/assistant/src/__tests__/session-process-bridge.test.ts b/assistant/src/__tests__/session-process-bridge.test.ts deleted file mode 100644 index 76dc114c648..00000000000 --- a/assistant/src/__tests__/session-process-bridge.test.ts +++ /dev/null @@ -1,244 +0,0 @@ -import { describe, test, expect, beforeEach, mock } from 'bun:test'; -import { mkdtempSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; - -const testDir = mkdtempSync(join(tmpdir(), 'session-process-bridge-test-')); - -// ── Platform + logger mocks ───────────────────────────────────────── - -mock.module('../util/platform.js', () => ({ - getDataDir: () => testDir, - isMacOS: () => process.platform === 'darwin', - isLinux: () => process.platform === 'linux', - isWindows: () => process.platform === 'win32', - getSocketPath: () => join(testDir, 'test.sock'), - getPidPath: () => join(testDir, 'test.pid'), - getDbPath: () => join(testDir, 'test.db'), - getLogPath: () => join(testDir, 'test.log'), - ensureDataDir: () => {}, -})); - -mock.module('../util/logger.js', () => ({ - getLogger: () => - new Proxy({} as Record, { - get: () => () => {}, - }), -})); - -mock.module('../config/loader.js', () => ({ - getConfig: () => ({ - apiKeys: { anthropic: 'test-key' }, - model: 'claude-sonnet-4-20250514', - provider: 'anthropic', - memory: { enabled: false }, - calls: { enabled: false }, - contextWindow: { maxInputTokens: 200000 }, - }), -})); - -// ── Mock the call bridge ───────────────────────────────────────────── - -import type { CallBridgeResult } from '../calls/call-bridge.js'; - -const mockTryRouteCallMessage = mock( - (_convId: string, _text: string, _msgId?: string): Promise => - Promise.resolve({ handled: false, reason: 'no_active_call' }), -); - -mock.module('../calls/call-bridge.js', () => ({ - tryRouteCallMessage: (...args: [string, string, string?]) => mockTryRouteCallMessage(...args), -})); - -// ── Mock slash resolution ──────────────────────────────────────────── - -mock.module('./session-slash.js', () => ({ - resolveSlash: (content: string) => ({ kind: 'passthrough' as const, content }), -})); - -// ── Import after mocks ────────────────────────────────────────────── - -import type { ServerMessage } from '../daemon/ipc-protocol.js'; -import type { ProcessSessionContext } from '../daemon/session-process.js'; -import { processMessage, drainQueue } from '../daemon/session-process.js'; -import { MessageQueue } from '../daemon/session-queue-manager.js'; - -// ── Session mock factory ───────────────────────────────────────────── - -function createMockSession(overrides?: Partial): ProcessSessionContext { - return { - conversationId: 'test-conv', - messages: [], - processing: false, - abortController: null, - currentRequestId: undefined, - queue: new MessageQueue(), - traceEmitter: { - emit: () => {}, - } as unknown as ProcessSessionContext['traceEmitter'], - usageStats: { inputTokens: 0, outputTokens: 0, estimatedCost: 0 }, - persistUserMessage: mock((_content: string, _attachments: unknown[], _requestId?: string) => 'mock-msg-id'), - runAgentLoop: mock(async () => {}), - ...overrides, - }; -} - -// ── Tests ──────────────────────────────────────────────────────────── - -describe('session-process bridge consumption', () => { - beforeEach(() => { - mockTryRouteCallMessage.mockReset(); - }); - - // ── Direct processMessage path ─────────────────────────────── - - test('processMessage emits assistant_text_delta + message_complete when bridge consumes with userFacingText', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: true, - userFacingText: 'Instruction relayed to active call.', - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession(); - - await processMessage(session, 'ask about pricing', [], onEvent); - - // Should have emitted text delta then message_complete - const textDelta = events.find((e) => e.type === 'assistant_text_delta'); - expect(textDelta).toBeDefined(); - expect((textDelta as { text: string }).text).toBe('Instruction relayed to active call.'); - - const complete = events.find((e) => e.type === 'message_complete'); - expect(complete).toBeDefined(); - - // Should NOT have called runAgentLoop - expect(session.runAgentLoop).not.toHaveBeenCalled(); - }); - - test('processMessage emits failure text when bridge consumes with failure userFacingText', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: true, - reason: 'instruction_relay_failed', - userFacingText: 'Failed to relay instruction to the active call.', - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession(); - - await processMessage(session, 'change the topic', [], onEvent); - - const textDelta = events.find((e) => e.type === 'assistant_text_delta'); - expect(textDelta).toBeDefined(); - expect((textDelta as { text: string }).text).toBe('Failed to relay instruction to the active call.'); - - const complete = events.find((e) => e.type === 'message_complete'); - expect(complete).toBeDefined(); - - // Only one message_complete - const completeCount = events.filter((e) => e.type === 'message_complete').length; - expect(completeCount).toBe(1); - - expect(session.runAgentLoop).not.toHaveBeenCalled(); - }); - - test('processMessage skips text delta when bridge consumes without userFacingText', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: true, - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession(); - - await processMessage(session, 'hello', [], onEvent); - - const textDelta = events.find((e) => e.type === 'assistant_text_delta'); - expect(textDelta).toBeUndefined(); - - const complete = events.find((e) => e.type === 'message_complete'); - expect(complete).toBeDefined(); - - expect(session.runAgentLoop).not.toHaveBeenCalled(); - }); - - test('processMessage falls through to agent loop when bridge does not consume', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: false, - reason: 'no_active_call', - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession(); - - await processMessage(session, 'normal message', [], onEvent); - - expect(session.runAgentLoop).toHaveBeenCalled(); - }); - - // ── Queued routeOrProcess path ─────────────────────────────── - - test('drainQueue emits assistant_text_delta + message_complete for bridge-consumed queued message', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: true, - userFacingText: 'Instruction relayed to active call.', - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession({ processing: true }); - - // Enqueue a message - session.queue.push({ - content: 'ask about pricing', - attachments: [], - requestId: 'req-1', - onEvent, - }); - - drainQueue(session); - - // Wait for async routeOrProcess - await new Promise((r) => setTimeout(r, 50)); - - const textDelta = events.find((e) => e.type === 'assistant_text_delta'); - expect(textDelta).toBeDefined(); - expect((textDelta as { text: string }).text).toBe('Instruction relayed to active call.'); - - // message_complete (from dequeue + bridge consumption — only one expected for this request) - const completeEvents = events.filter((e) => e.type === 'message_complete'); - expect(completeEvents.length).toBe(1); - - expect(session.runAgentLoop).not.toHaveBeenCalled(); - }); - - test('drainQueue emits failure text for bridge-consumed queued message with relay failure', async () => { - mockTryRouteCallMessage.mockResolvedValue({ - handled: true, - reason: 'instruction_relay_failed', - userFacingText: 'Failed to relay instruction to the active call.', - }); - - const events: ServerMessage[] = []; - const onEvent = (msg: ServerMessage) => events.push(msg); - const session = createMockSession({ processing: true }); - - session.queue.push({ - content: 'change the topic', - attachments: [], - requestId: 'req-2', - onEvent, - }); - - drainQueue(session); - await new Promise((r) => setTimeout(r, 50)); - - const textDelta = events.find((e) => e.type === 'assistant_text_delta'); - expect(textDelta).toBeDefined(); - expect((textDelta as { text: string }).text).toBe('Failed to relay instruction to the active call.'); - - expect(session.runAgentLoop).not.toHaveBeenCalled(); - }); -}); diff --git a/assistant/src/calls/call-bridge.ts b/assistant/src/calls/call-bridge.ts deleted file mode 100644 index 300fff31cba..00000000000 --- a/assistant/src/calls/call-bridge.ts +++ /dev/null @@ -1,168 +0,0 @@ -/** - * Call message bridge: intercepts user messages in-thread and routes them - * to the live call orchestrator — either as answers to pending questions - * or as mid-call steering instructions. - * - * Decision priority: - * 1. If a pending question exists → answer path (existing behavior). - * 2. If no pending question but an active call exists → instruction path. - * - * When the bridge consumes a message it returns `{ handled: true }` so - * the caller can skip agent processing. - */ - -import { getLogger } from '../util/logger.js'; -import { - getActiveCallSessionForConversation, - getPendingQuestion, - answerPendingQuestion, - recordCallEvent, - getCallSession, -} from './call-store.js'; -import { getCallOrchestrator } from './call-state.js'; -import { relayInstruction } from './call-domain.js'; -import * as conversationStore from '../memory/conversation-store.js'; - -const log = getLogger('call-bridge'); - -export interface CallBridgeResult { - handled: boolean; - reason?: string; - /** User-facing text persisted in-thread by the bridge (success ack or failure notice). */ - userFacingText?: string; -} - -/** - * Attempt to route a user message to an active call — as an answer to - * a pending question (priority) or as a mid-call steering instruction. - * - * @param conversationId - The conversation the message belongs to. - * @param userText - The user's message text. - * @param _userMessageId - The persisted message ID (reserved for future use). - * @returns `{ handled: true }` if the message was consumed by the call system, - * `{ handled: false, reason }` otherwise. - */ -export async function tryRouteCallMessage( - conversationId: string, - userText: string, - _userMessageId?: string, -): Promise { - // 1. Find an active call for this conversation - const callSession = getActiveCallSessionForConversation(conversationId); - if (!callSession) { - return { handled: false, reason: 'no_active_call' }; - } - - // 2. Check for a pending question — answer path takes priority - const pendingQuestion = getPendingQuestion(callSession.id); - if (pendingQuestion) { - return handleAnswer(conversationId, callSession.id, pendingQuestion, userText); - } - - // 3. No pending question — instruction path - return handleInstruction(conversationId, callSession.id, userText); -} - -/** @deprecated Use `tryRouteCallMessage` instead. */ -export const tryHandlePendingCallAnswer = tryRouteCallMessage; - -// ── Answer path ───────────────────────────────────────────────────── - -async function handleAnswer( - conversationId: string, - callSessionId: string, - pendingQuestion: { id: string; questionText: string }, - userText: string, -): Promise { - // Empty text (e.g. attachment-only messages) should not be consumed as - // an answer — fall through to normal processing so attachments are handled. - if (!userText.trim()) { - return { handled: false, reason: 'empty_answer_text' }; - } - - const orchestrator = getCallOrchestrator(callSessionId); - if (!orchestrator) { - // The call may have ended between the question being asked and the - // user replying. Persist a follow-up message so the user knows. - const freshSession = getCallSession(callSessionId); - const ended = freshSession && (freshSession.status === 'completed' || freshSession.status === 'failed'); - if (ended) { - conversationStore.addMessage( - conversationId, - 'assistant', - JSON.stringify([{ - type: 'text', - text: 'The call ended before your answer could be relayed to the caller.', - }]), - ); - } - return { handled: false, reason: 'orchestrator_not_found' }; - } - - if (orchestrator.getState() !== 'waiting_on_user') { - return { handled: false, reason: 'orchestrator_not_waiting' }; - } - - const accepted = await orchestrator.handleUserAnswer(userText); - if (!accepted) { - return { handled: false, reason: 'orchestrator_rejected' }; - } - - answerPendingQuestion(pendingQuestion.id, userText); - recordCallEvent(callSessionId, 'user_answered', { answer: userText }); - - log.info( - { conversationId, callSessionId, questionId: pendingQuestion.id }, - 'User reply routed as call answer via bridge', - ); - - return { handled: true }; -} - -// ── Instruction path ──────────────────────────────────────────────── - -async function handleInstruction( - conversationId: string, - callSessionId: string, - userText: string, -): Promise { - // Empty text (e.g. attachment-only messages) should not be relayed — - // fall through to normal processing so attachments are handled. - if (!userText.trim()) { - return { handled: false, reason: 'empty_instruction_text' }; - } - - const result = await relayInstruction({ callSessionId, instructionText: userText }); - - if (!result.ok) { - log.warn( - { conversationId, callSessionId, error: result.error }, - 'Instruction relay failed via bridge', - ); - - const failureText = 'Failed to relay instruction to the active call.'; - conversationStore.addMessage( - conversationId, - 'assistant', - JSON.stringify([{ type: 'text', text: failureText }]), - ); - - // Consumed: caller should NOT fall through to the agent loop - return { handled: true, reason: 'instruction_relay_failed', userFacingText: failureText }; - } - - // Persist a concise acknowledgement so the user sees confirmation - const ackText = 'Instruction relayed to active call.'; - conversationStore.addMessage( - conversationId, - 'assistant', - JSON.stringify([{ type: 'text', text: ackText }]), - ); - - log.info( - { conversationId, callSessionId }, - 'User message routed as call instruction via bridge', - ); - - return { handled: true, userFacingText: ackText }; -} diff --git a/assistant/src/calls/call-domain.ts b/assistant/src/calls/call-domain.ts index a98ccec7509..ed029c07d89 100644 --- a/assistant/src/calls/call-domain.ts +++ b/assistant/src/calls/call-domain.ts @@ -28,6 +28,7 @@ import { VALID_CALLER_IDENTITY_MODES } from '../config/schema.js'; import type { AssistantConfig } from '../config/types.js'; import { getOrCreateConversation } from '../memory/conversation-key-store.js'; import { upsertBinding } from '../memory/external-conversation-store.js'; +import { addPointerMessage } from './call-pointer-messages.js'; const log = getLogger('call-domain'); @@ -262,6 +263,9 @@ export async function startCall(input: StartCallInput): Promise 0 ? formatDuration(durationMs) : undefined, + }); + } this.state = 'idle'; return; } @@ -569,6 +577,14 @@ export class CallOrchestrator { if (shouldNotifyCompletion && currentSession) { fireCallCompletionNotifier(currentSession.conversationId, this.callSessionId); } + + // Post a pointer message in the initiating conversation + if (currentSession?.initiatedFromConversationId) { + const durationMs = currentSession.startedAt ? Date.now() - currentSession.startedAt : 0; + addPointerMessage(currentSession.initiatedFromConversationId, 'completed', currentSession.toNumber, { + duration: durationMs > 0 ? formatDuration(durationMs) : undefined, + }); + } }, 3000); }, maxDurationMs); } diff --git a/assistant/src/calls/call-pointer-messages.ts b/assistant/src/calls/call-pointer-messages.ts new file mode 100644 index 00000000000..90448cd8e39 --- /dev/null +++ b/assistant/src/calls/call-pointer-messages.ts @@ -0,0 +1,50 @@ +/** + * Concise pointer/status messages posted to the initiating conversation + * so the user sees call lifecycle events without the full transcript + * (which lives in the dedicated voice conversation). + */ + +import * as conversationStore from '../memory/conversation-store.js'; + +export type PointerEvent = 'started' | 'completed' | 'failed'; + +export function addPointerMessage( + conversationId: string, + event: PointerEvent, + phoneNumber: string, + extra?: { duration?: string; reason?: string }, +): void { + let text: string; + switch (event) { + case 'started': + text = `\u{1F4DE} Call to ${phoneNumber} started. See voice thread for details.`; + break; + case 'completed': + text = extra?.duration + ? `\u{1F4DE} Call to ${phoneNumber} completed (${extra.duration}).` + : `\u{1F4DE} Call to ${phoneNumber} completed.`; + break; + case 'failed': + text = extra?.reason + ? `\u{1F4DE} Call to ${phoneNumber} failed: ${extra.reason}.` + : `\u{1F4DE} Call to ${phoneNumber} failed.`; + break; + } + + conversationStore.addMessage( + conversationId, + 'assistant', + JSON.stringify([{ type: 'text', text }]), + ); +} + +/** + * Format a duration in milliseconds into a human-friendly string. + */ +export function formatDuration(ms: number): string { + const totalSeconds = Math.round(ms / 1000); + if (totalSeconds < 60) return `${totalSeconds}s`; + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`; +} diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index 734853b9fa8..706505f3eca 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -16,6 +16,7 @@ import { } from './call-store.js'; import { CallOrchestrator } from './call-orchestrator.js'; import { fireCallTranscriptNotifier, fireCallCompletionNotifier } from './call-state.js'; +import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; import { extractPromptSpeakerMetadata, SpeakerIdentityTracker, @@ -252,6 +253,14 @@ export class RelayConnection { reason: reason || 'relay_closed', closeCode: code, }); + + // Post a pointer message in the initiating conversation + if (session.initiatedFromConversationId) { + const durationMs = session.startedAt ? Date.now() - session.startedAt : 0; + addPointerMessage(session.initiatedFromConversationId, 'completed', session.toNumber, { + duration: durationMs > 0 ? formatDuration(durationMs) : undefined, + }); + } } else { const detail = reason || (code ? `relay_closed_${code}` : 'relay_closed_abnormal'); updateCallSession(this.callSessionId, { @@ -263,6 +272,13 @@ export class RelayConnection { reason: detail, closeCode: code, }); + + // Post a failure pointer message in the initiating conversation + if (session.initiatedFromConversationId) { + addPointerMessage(session.initiatedFromConversationId, 'failed', session.toNumber, { + reason: detail, + }); + } } expirePendingQuestions(this.callSessionId); diff --git a/assistant/src/config/bundled-skills/phone-calls/SKILL.md b/assistant/src/config/bundled-skills/phone-calls/SKILL.md index 35b6da64b67..a9c44056ffa 100644 --- a/assistant/src/config/bundled-skills/phone-calls/SKILL.md +++ b/assistant/src/config/bundled-skills/phone-calls/SKILL.md @@ -343,11 +343,11 @@ By default, always show the live transcript of the call as it happens. When a ca ### Interacting with a live call -During an active call, the user can type messages in the chat thread to interact with the AI voice agent in real time. Messages are automatically routed to the call via the call bridge, which decides how to handle them based on the call's current state: +During an active call, the user can interact with the AI voice agent via the HTTP API endpoints: -#### Mode 1: Answering questions +#### Answering questions -When the AI voice agent encounters something it needs user input for, a **pending question** appears in the chat. The call status changes to `waiting_on_user`. +When the AI voice agent encounters something it needs user input for, a **pending question** appears in the voice thread. The call status changes to `waiting_on_user`. 1. A **pending question** appears in `call_status` output 2. Present the question prominently to the user: @@ -357,23 +357,21 @@ When the AI voice agent encounters something it needs user input for, a **pendin "They're asking if you'd prefer the smoking or non-smoking section?" ``` -3. The user replies directly in the chat — since there is a pending question, the reply is automatically routed as an **answer** to the AI voice agent +3. Use the `call_answer` tool or the HTTP API (`POST /v1/calls/:id/answer`) to relay the answer to the AI voice agent 4. The AI voice agent receives the answer and continues the conversation naturally **Important:** Respond to pending questions quickly. There is a consultation timeout (default: 2 minutes). If no answer is provided in time, the AI voice agent will move on. -#### Mode 2: Steering with instructions +#### Steering with instructions -When there is **no pending question** but the call is still active, any message the user types in the chat is treated as a **steering instruction**. This lets the user proactively guide the call in real time — for example: +When there is **no pending question** but the call is still active, the user can send steering instructions via the HTTP API (`POST /v1/calls/:id/instruction`) to proactively guide the call in real time — for example: - "Ask them about their cancellation policy too" - "Wrap up the call, we have what we need" - "Switch to asking about weekend availability instead" - "Be more assertive about getting a discount" -The instruction is injected into the AI voice agent's conversation context as high-priority input, and the agent adjusts its behavior accordingly. A confirmation message ("Instruction relayed to active call.") appears in the chat thread. - -**The user does not need to do anything special** — just type a message. The system automatically determines whether it should be an answer or an instruction based on whether a question is pending. +The instruction is injected into the AI voice agent's conversation context as high-priority input, and the agent adjusts its behavior accordingly. ### Call status values diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts index 9dea52aa072..e4ff29497bc 100644 --- a/assistant/src/daemon/server.ts +++ b/assistant/src/daemon/server.ts @@ -31,7 +31,6 @@ import { ensureBlobDir, sweepStaleBlobs } from './ipc-blob-store.js'; import { bootstrapHomeBaseAppLink } from '../home-base/bootstrap.js'; import { SessionEvictor } from './session-evictor.js'; import { getSubagentManager } from '../subagent/index.js'; -import { tryRouteCallMessage } from '../calls/call-bridge.js'; import { resolveSlash } from './session-slash.js'; import { createUserMessage, createAssistantMessage } from '../agent/message-types.js'; import { registerDaemonCallbacks } from '../work-items/work-item-runner.js'; @@ -697,21 +696,6 @@ export class DaemonServer { const requestId = crypto.randomUUID(); const messageId = session.persistUserMessage(content, attachments, requestId); - let bridgeHandled = false; - try { - const bridgeResult = await tryRouteCallMessage(conversationId, content, messageId); - bridgeHandled = bridgeResult.handled; - } catch (err) { - log.warn({ err, conversationId }, 'Call bridge check failed (non-fatal), proceeding with agent loop'); - } - - if (bridgeHandled) { - resetSessionProcessingState(session); - session.drainQueue('loop_complete'); - log.info({ conversationId, messageId }, 'User message consumed by call bridge, skipping agent loop'); - return { messageId }; - } - session.runAgentLoop(content, messageId, () => {}).catch((err) => { log.error({ err, conversationId }, 'Background agent loop failed'); }); @@ -787,22 +771,6 @@ export class DaemonServer { throw err; } - let bridgeHandled = false; - try { - const bridgeResult = await tryRouteCallMessage(conversationId, resolvedContent, messageId); - bridgeHandled = bridgeResult.handled; - } catch (err) { - log.warn({ err, conversationId }, 'Call bridge check failed (non-fatal), proceeding with agent loop'); - } - - if (bridgeHandled) { - (session as unknown as { preactivatedSkillIds?: string[] }).preactivatedSkillIds = undefined; - resetSessionProcessingState(session); - session.drainQueue('loop_complete'); - log.info({ conversationId, messageId }, 'User message consumed by call bridge, skipping agent loop'); - return { messageId }; - } - await session.runAgentLoop(resolvedContent, messageId, () => {}); return { messageId }; @@ -825,14 +793,3 @@ export class DaemonServer { } } - -function resetSessionProcessingState(session: Session): void { - const s = session as unknown as { - processing: boolean; - abortController: AbortController | null; - currentRequestId: string | undefined; - }; - s.processing = false; - s.abortController = null; - s.currentRequestId = undefined; -} diff --git a/assistant/src/daemon/session-notifiers.ts b/assistant/src/daemon/session-notifiers.ts index 067e98d1408..59af01bf1e3 100644 --- a/assistant/src/daemon/session-notifiers.ts +++ b/assistant/src/daemon/session-notifiers.ts @@ -95,7 +95,7 @@ export function registerSessionNotifiers( registerCallQuestionNotifier(conversationId, (callSessionId: string, question: string) => { const callSession = getCallSession(callSessionId); const callee = callSession?.toNumber ?? 'the caller'; - const questionText = `**Live call question** (to ${callee}):\n\n${question}\n\n_Reply in this thread to answer. Your next message will be treated as the answer to this question. Once answered, you can send messages to steer the conversation._`; + const questionText = `**Live call question** (to ${callee}):\n\n${question}\n\n_Use the call answer API to respond._`; conversationStore.addMessage( conversationId, diff --git a/assistant/src/daemon/session-process.ts b/assistant/src/daemon/session-process.ts index 9145383f32f..489ffd5e254 100644 --- a/assistant/src/daemon/session-process.ts +++ b/assistant/src/daemon/session-process.ts @@ -17,7 +17,6 @@ import * as conversationStore from '../memory/conversation-store.js'; import { resolveSlash, type SlashContext } from './session-slash.js'; import { getConfig } from '../config/loader.js'; import { getLogger } from '../util/logger.js'; -import { tryRouteCallMessage } from '../calls/call-bridge.js'; const log = getLogger('session-process'); @@ -197,49 +196,15 @@ export function drainQueue(session: ProcessSessionContext, reason: QueueDrainRea session.currentPage = next.currentPage; // Fire-and-forget: persistUserMessage set session.processing = true - // so subsequent messages will still be enqueued. Route through the call - // bridge first — if consumed, skip agent processing and continue draining. + // so subsequent messages will still be enqueued. // runAgentLoop's finally block will call drainQueue when this run completes. - routeOrProcess(session, resolvedContent, userMessageId, next).catch((err) => { + session.runAgentLoop(resolvedContent, userMessageId, next.onEvent).catch((err) => { const message = err instanceof Error ? err.message : String(err); log.error({ err, conversationId: session.conversationId, requestId: next.requestId }, 'Error processing queued message'); next.onEvent({ type: 'error', message: `Failed to process queued message: ${message}` }); }); } -/** - * Try the call bridge first; if not consumed, run the agent loop. - * Used by drainQueue to handle the async bridge check in fire-and-forget mode. - */ -async function routeOrProcess( - session: ProcessSessionContext, - content: string, - userMessageId: string, - next: { onEvent: (msg: ServerMessage) => void; requestId: string }, -): Promise { - try { - const bridgeResult = await tryRouteCallMessage(session.conversationId, content, userMessageId); - if (bridgeResult.handled) { - session.preactivatedSkillIds = undefined; - session.processing = false; - session.abortController = null; - session.currentRequestId = undefined; - log.info({ conversationId: session.conversationId, userMessageId }, 'Queued message consumed by call bridge, skipping agent loop'); - if (bridgeResult.userFacingText) { - next.onEvent({ type: 'assistant_text_delta', text: bridgeResult.userFacingText }); - } - next.onEvent({ type: 'message_complete', sessionId: session.conversationId }); - // runAgentLoop never ran so its finally block won't drain — continue manually - drainQueue(session); - return; - } - } catch (err) { - log.warn({ err, conversationId: session.conversationId }, 'Call bridge check failed (non-fatal), proceeding with agent loop'); - } - - await session.runAgentLoop(content, userMessageId, next.onEvent); -} - // ── processMessage ─────────────────────────────────────────────────── /** @@ -313,28 +278,6 @@ export async function processMessage( return ''; } - // Route through the call bridge before the agent loop. When the bridge - // consumes the message (answer or instruction), skip agent processing. - try { - const bridgeResult = await tryRouteCallMessage(session.conversationId, resolvedContent, userMessageId); - if (bridgeResult.handled) { - session.preactivatedSkillIds = undefined; - session.processing = false; - session.abortController = null; - session.currentRequestId = undefined; - log.info({ conversationId: session.conversationId, userMessageId }, 'IPC message consumed by call bridge, skipping agent loop'); - if (bridgeResult.userFacingText) { - onEvent({ type: 'assistant_text_delta', text: bridgeResult.userFacingText }); - } - onEvent({ type: 'message_complete', sessionId: session.conversationId }); - // runAgentLoop never ran so its finally block won't drain — continue manually - drainQueue(session); - return userMessageId; - } - } catch (err) { - log.warn({ err, conversationId: session.conversationId }, 'Call bridge check failed (non-fatal), proceeding with agent loop'); - } - await session.runAgentLoop(resolvedContent, userMessageId, onEvent); return userMessageId; } From 1ae8011fcd2a228a0ca02d7f6d1e1b5798fd111c Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:43:54 -0500 Subject: [PATCH 05/13] feat: DTMF callee verification for outbound voice calls (#7533) Co-authored-by: Claude --- assistant/src/calls/call-pointer-messages.ts | 6 +- assistant/src/calls/relay-server.ts | 147 ++++++++++++++++++- assistant/src/calls/types.ts | 2 +- assistant/src/config/schema.ts | 27 ++++ 4 files changed, 177 insertions(+), 5 deletions(-) diff --git a/assistant/src/calls/call-pointer-messages.ts b/assistant/src/calls/call-pointer-messages.ts index 90448cd8e39..8dfb751534c 100644 --- a/assistant/src/calls/call-pointer-messages.ts +++ b/assistant/src/calls/call-pointer-messages.ts @@ -12,12 +12,14 @@ export function addPointerMessage( conversationId: string, event: PointerEvent, phoneNumber: string, - extra?: { duration?: string; reason?: string }, + extra?: { duration?: string; reason?: string; verificationCode?: string }, ): void { let text: string; switch (event) { case 'started': - text = `\u{1F4DE} Call to ${phoneNumber} started. See voice thread for details.`; + text = extra?.verificationCode + ? `\u{1F4DE} Call to ${phoneNumber} started. Verification code: ${extra.verificationCode}` + : `\u{1F4DE} Call to ${phoneNumber} started. See voice thread for details.`; break; case 'completed': text = extra?.duration diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index 706505f3eca..dfb7eb4a460 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -7,7 +7,9 @@ */ import type { ServerWebSocket } from 'bun'; +import { randomInt } from 'node:crypto'; import { getLogger } from '../util/logger.js'; +import { getConfig } from '../config/loader.js'; import { getCallSession, updateCallSession, @@ -17,6 +19,7 @@ import { import { CallOrchestrator } from './call-orchestrator.js'; import { fireCallTranscriptNotifier, fireCallCompletionNotifier } from './call-state.js'; import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; +import * as conversationStore from '../memory/conversation-store.js'; import { extractPromptSpeakerMetadata, SpeakerIdentityTracker, @@ -111,6 +114,8 @@ export const activeRelayConnections = new Map(); /** * Manages a single WebSocket connection for one call. */ +export type RelayConnectionState = 'connected' | 'verification_pending'; + export class RelayConnection { private ws: ServerWebSocket; private callSessionId: string; @@ -124,6 +129,14 @@ export class RelayConnection { private orchestrator: CallOrchestrator | null = null; private speakerIdentityTracker: SpeakerIdentityTracker; + // Verification state + private connectionState: RelayConnectionState = 'connected'; + private verificationCode: string | null = null; + private verificationAttempts = 0; + private verificationMaxAttempts = 3; + private verificationCodeLength = 6; + private dtmfBuffer = ''; + constructor(ws: ServerWebSocket, callSessionId: string) { this.ws = ws; this.callSessionId = callSessionId; @@ -132,6 +145,13 @@ export class RelayConnection { this.speakerIdentityTracker = new SpeakerIdentityTracker(); } + /** + * Get the verification code for this connection (if verification is active). + */ + getVerificationCode(): string | null { + return this.verificationCode; + } + /** * Handle an inbound message from Twilio via the ConversationRelay WebSocket. */ @@ -318,8 +338,60 @@ export class RelayConnection { // Create and attach the LLM-driven orchestrator const orchestrator = new CallOrchestrator(this.callSessionId, this, session?.task ?? null); this.setOrchestrator(orchestrator); - orchestrator.startInitialGreeting().catch((err) => - log.error({ err, callSessionId: this.callSessionId }, 'Failed to start initial outbound greeting'), + + // Check if callee verification is enabled + const config = getConfig(); + const verificationConfig = config.calls.verification; + if (verificationConfig.enabled) { + this.startVerification(session, verificationConfig); + } else { + orchestrator.startInitialGreeting().catch((err) => + log.error({ err, callSessionId: this.callSessionId }, 'Failed to start initial outbound greeting'), + ); + } + } + + /** + * Generate a verification code and prompt the callee to enter it via DTMF. + */ + private startVerification( + session: ReturnType, + verificationConfig: { maxAttempts: number; codeLength: number }, + ): void { + this.verificationMaxAttempts = verificationConfig.maxAttempts; + this.verificationCodeLength = verificationConfig.codeLength; + this.verificationAttempts = 0; + this.dtmfBuffer = ''; + + // Generate a random numeric code + const maxValue = Math.pow(10, this.verificationCodeLength); + const code = randomInt(0, maxValue).toString().padStart(this.verificationCodeLength, '0'); + this.verificationCode = code; + this.connectionState = 'verification_pending'; + + recordCallEvent(this.callSessionId, 'callee_verification_started', { + codeLength: this.verificationCodeLength, + maxAttempts: this.verificationMaxAttempts, + }); + + // Send a TTS prompt with the code spoken digit by digit + const spokenCode = code.split('').join('. '); + this.sendTextToken(`Please enter the verification code: ${spokenCode}.`, true); + + // Post the verification code to the initiating conversation so the + // guardian (user) can share it with the callee. + if (session?.initiatedFromConversationId) { + const codeMsg = `\u{1F510} Verification code for call to ${session.toNumber}: ${code}`; + conversationStore.addMessage( + session.initiatedFromConversationId, + 'assistant', + JSON.stringify([{ type: 'text', text: codeMsg }]), + ); + } + + log.info( + { callSessionId: this.callSessionId, codeLength: this.verificationCodeLength }, + 'Callee verification started', ); } @@ -329,6 +401,13 @@ export class RelayConnection { return; } + // During verification, ignore voice prompts — the callee should be + // entering DTMF digits, not speaking. + if (this.connectionState === 'verification_pending') { + log.debug({ callSessionId: this.callSessionId }, 'Ignoring voice prompt during verification'); + return; + } + log.info( { callSessionId: this.callSessionId, transcript: msg.voicePrompt, lang: msg.lang }, 'Caller transcript received (final)', @@ -394,6 +473,70 @@ export class RelayConnection { recordCallEvent(this.callSessionId, 'caller_spoke', { dtmfDigit: msg.digit, }); + + // If verification is pending, accumulate digits and check the code + if (this.connectionState === 'verification_pending' && this.verificationCode) { + this.dtmfBuffer += msg.digit; + + if (this.dtmfBuffer.length >= this.verificationCodeLength) { + const enteredCode = this.dtmfBuffer.slice(0, this.verificationCodeLength); + this.dtmfBuffer = ''; + + if (enteredCode === this.verificationCode) { + // Verification succeeded + this.connectionState = 'connected'; + this.verificationCode = null; + this.verificationAttempts = 0; + + recordCallEvent(this.callSessionId, 'callee_verification_succeeded', {}); + log.info({ callSessionId: this.callSessionId }, 'Callee verification succeeded'); + + // Proceed to the normal call flow + if (this.orchestrator) { + this.orchestrator.startInitialGreeting().catch((err) => + log.error({ err, callSessionId: this.callSessionId }, 'Failed to start initial outbound greeting after verification'), + ); + } + } else { + // Verification failed for this attempt + this.verificationAttempts++; + + if (this.verificationAttempts >= this.verificationMaxAttempts) { + // Max attempts reached — end the call + recordCallEvent(this.callSessionId, 'callee_verification_failed', { + attempts: this.verificationAttempts, + }); + log.warn({ callSessionId: this.callSessionId, attempts: this.verificationAttempts }, 'Callee verification failed — max attempts reached'); + + this.sendTextToken('Verification failed. Goodbye.', true); + + // End the call with failed status after TTS plays + setTimeout(() => { + this.endSession('Verification failed'); + updateCallSession(this.callSessionId, { + status: 'failed', + endedAt: Date.now(), + lastError: 'Callee verification failed — max attempts exceeded', + }); + + const session = getCallSession(this.callSessionId); + if (session?.initiatedFromConversationId) { + addPointerMessage(session.initiatedFromConversationId, 'failed', session.toNumber, { + reason: 'Callee verification failed', + }); + } + }, 2000); + } else { + // Allow another attempt + log.info( + { callSessionId: this.callSessionId, attempt: this.verificationAttempts, maxAttempts: this.verificationMaxAttempts }, + 'Callee verification attempt failed — retrying', + ); + this.sendTextToken('That code was incorrect. Please try again.', true); + } + } + } + } } private handleError(msg: RelayErrorMessage): void { diff --git a/assistant/src/calls/types.ts b/assistant/src/calls/types.ts index 2dd381ae503..e79cfd87b3c 100644 --- a/assistant/src/calls/types.ts +++ b/assistant/src/calls/types.ts @@ -1,5 +1,5 @@ export type CallStatus = 'initiated' | 'ringing' | 'in_progress' | 'waiting_on_user' | 'completed' | 'failed' | 'cancelled'; -export type CallEventType = 'call_started' | 'call_connected' | 'caller_spoke' | 'assistant_spoke' | 'user_question_asked' | 'user_answered' | 'user_instruction_relayed' | 'call_ended' | 'call_failed'; +export type CallEventType = 'call_started' | 'call_connected' | 'caller_spoke' | 'assistant_spoke' | 'user_question_asked' | 'user_answered' | 'user_instruction_relayed' | 'call_ended' | 'call_failed' | 'callee_verification_started' | 'callee_verification_succeeded' | 'callee_verification_failed'; export type PendingQuestionStatus = 'pending' | 'answered' | 'expired' | 'cancelled'; export interface CallSession { diff --git a/assistant/src/config/schema.ts b/assistant/src/config/schema.ts index ba2742b08cd..1b70d5830af 100644 --- a/assistant/src/config/schema.ts +++ b/assistant/src/config/schema.ts @@ -999,6 +999,22 @@ export const CallerIdentityConfigSchema = z.object({ .optional(), }); +export const CallsVerificationConfigSchema = z.object({ + enabled: z + .boolean({ error: 'calls.verification.enabled must be a boolean' }) + .default(false), + maxAttempts: z + .number({ error: 'calls.verification.maxAttempts must be a number' }) + .int('calls.verification.maxAttempts must be an integer') + .positive('calls.verification.maxAttempts must be a positive integer') + .default(3), + codeLength: z + .number({ error: 'calls.verification.codeLength must be a number' }) + .int('calls.verification.codeLength must be an integer') + .positive('calls.verification.codeLength must be a positive integer') + .default(6), +}); + export const CallsConfigSchema = z.object({ enabled: z .boolean({ error: 'calls.enabled must be a boolean' }) @@ -1050,6 +1066,11 @@ export const CallsConfigSchema = z.object({ callerIdentity: CallerIdentityConfigSchema.default({ allowPerCallOverride: true, }), + verification: CallsVerificationConfigSchema.default({ + enabled: false, + maxAttempts: 3, + codeLength: 6, + }), }); export const SkillsConfigSchema = z.object({ @@ -1371,6 +1392,11 @@ export const AssistantConfigSchema = z.object({ callerIdentity: { allowPerCallOverride: true, }, + verification: { + enabled: false, + maxAttempts: 3, + codeLength: 6, + }, }), sms: SmsConfigSchema.default({ enabled: false, @@ -1441,5 +1467,6 @@ export type CallsSafetyConfig = z.infer; export type CallsVoiceConfig = z.infer; export type CallsElevenLabsConfig = z.infer; export type CallerIdentityConfig = z.infer; +export type CallsVerificationConfig = z.infer; export type SmsConfig = z.infer; export type IngressConfig = z.infer; From 5c0981c7552812ad23d40924cb884ff15e24123a Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:49:16 -0500 Subject: [PATCH 06/13] feat: cross-channel guardian data model, store, and dispatch (#7534) Co-authored-by: Claude --- assistant/src/calls/call-orchestrator.ts | 21 +- assistant/src/calls/guardian-dispatch.ts | 171 +++++++++ .../src/daemon/ipc-contract-inventory.json | 2 + assistant/src/daemon/ipc-contract.ts | 10 + assistant/src/memory/db-init.ts | 3 + assistant/src/memory/guardian-action-store.ts | 358 ++++++++++++++++++ assistant/src/memory/schema-migration.ts | 58 +++ assistant/src/memory/schema.ts | 45 +++ .../vellum-assistant/App/AppDelegate.swift | 16 + .../Features/MainWindow/ThreadManager.swift | 21 + clients/shared/IPC/DaemonClient.swift | 3 + clients/shared/IPC/DaemonMessageRouter.swift | 2 + .../IPC/Generated/IPCContractGenerated.swift | 17 + clients/shared/IPC/IPCMessages.swift | 4 + 14 files changed, 729 insertions(+), 2 deletions(-) create mode 100644 assistant/src/calls/guardian-dispatch.ts create mode 100644 assistant/src/memory/guardian-action-store.ts diff --git a/assistant/src/calls/call-orchestrator.ts b/assistant/src/calls/call-orchestrator.ts index 8d3bcc6839e..a55526d6a1a 100644 --- a/assistant/src/calls/call-orchestrator.ts +++ b/assistant/src/calls/call-orchestrator.ts @@ -22,6 +22,8 @@ import type { RelayConnection } from './relay-server.js'; import { registerCallOrchestrator, unregisterCallOrchestrator, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js'; import type { PromptSpeakerContext } from './speaker-identification.js'; import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; +import { dispatchGuardianQuestion } from './guardian-dispatch.js'; +import type { ServerMessage } from '../daemon/ipc-contract.js'; const log = getLogger('call-orchestrator'); @@ -64,11 +66,17 @@ export class CallOrchestrator { private initialGreetingStarted = false; /** Monotonic run id used to suppress stale turn side effects after interruption. */ private llmRunVersion = 0; + /** Optional broadcast function for emitting IPC events to connected clients. */ + private broadcast?: (msg: ServerMessage) => void; + /** Assistant identity for scoping guardian bindings. */ + private assistantId: string; - constructor(callSessionId: string, relay: RelayConnection, task: string | null) { + constructor(callSessionId: string, relay: RelayConnection, task: string | null, opts?: { broadcast?: (msg: ServerMessage) => void; assistantId?: string }) { this.callSessionId = callSessionId; this.relay = relay; this.task = task; + this.broadcast = opts?.broadcast; + this.assistantId = opts?.assistantId ?? 'self'; this.startDurationTimer(); this.resetSilenceTimer(); registerCallOrchestrator(callSessionId, this); @@ -417,7 +425,7 @@ export class CallOrchestrator { const askMatch = responseText.match(ASK_GUARDIAN_CAPTURE_REGEX); if (askMatch) { const questionText = askMatch[1]; - createPendingQuestion(this.callSessionId, questionText); + const pendingQuestion = createPendingQuestion(this.callSessionId, questionText); this.state = 'waiting_on_user'; updateCallSession(this.callSessionId, { status: 'waiting_on_user' }); recordCallEvent(this.callSessionId, 'user_question_asked', { question: questionText }); @@ -426,6 +434,15 @@ export class CallOrchestrator { const session = getCallSession(this.callSessionId); if (session) { fireCallQuestionNotifier(session.conversationId, this.callSessionId, questionText); + + // Dispatch guardian action request to all configured channels + void dispatchGuardianQuestion({ + callSessionId: this.callSessionId, + conversationId: session.conversationId, + assistantId: this.assistantId, + pendingQuestion, + broadcast: this.broadcast, + }); } // Set a consultation timeout diff --git a/assistant/src/calls/guardian-dispatch.ts b/assistant/src/calls/guardian-dispatch.ts new file mode 100644 index 00000000000..f286f7ec991 --- /dev/null +++ b/assistant/src/calls/guardian-dispatch.ts @@ -0,0 +1,171 @@ +/** + * Guardian dispatch engine for cross-channel voice calls. + * + * When a call orchestrator detects ASK_GUARDIAN, this module: + * 1. Creates a guardian_action_request + * 2. Determines delivery destinations (telegram, sms, mac) + * 3. Creates guardian_action_delivery rows for each destination + * 4. Sends HTTP POST to gateway for external channels + * 5. Emits IPC events for the mac channel + */ + +import { getLogger } from '../util/logger.js'; +import { getActiveBinding } from '../memory/channel-guardian-store.js'; +import { + createGuardianActionRequest, + createGuardianActionDelivery, + updateDeliveryStatus, +} from '../memory/guardian-action-store.js'; +import { deliverChannelReply } from '../runtime/gateway-client.js'; +import { getUserConsultationTimeoutMs } from './call-constants.js'; +import type { CallPendingQuestion } from './types.js'; +import type { ServerMessage } from '../daemon/ipc-contract.js'; + +const log = getLogger('guardian-dispatch'); + +/** Resolve the gateway base URL for internal delivery callbacks. */ +function getGatewayBaseUrl(): string { + if (process.env.GATEWAY_INTERNAL_BASE_URL) { + return process.env.GATEWAY_INTERNAL_BASE_URL.replace(/\/+$/, ''); + } + const port = Number(process.env.GATEWAY_PORT) || 7830; + return `http://127.0.0.1:${port}`; +} + +export interface GuardianDispatchParams { + callSessionId: string; + conversationId: string; + assistantId: string; + pendingQuestion: CallPendingQuestion; + /** Broadcast function to emit IPC events to connected clients. */ + broadcast?: (msg: ServerMessage) => void; +} + +/** + * Dispatch a guardian action request to all configured channels. + * Fire-and-forget: errors are logged but do not propagate. + */ +export async function dispatchGuardianQuestion(params: GuardianDispatchParams): Promise { + const { + callSessionId, + conversationId, + assistantId, + pendingQuestion, + broadcast, + } = params; + + try { + const expiresAt = Date.now() + getUserConsultationTimeoutMs(); + + // Create the action request + const request = createGuardianActionRequest({ + assistantId, + kind: 'ask_guardian', + sourceChannel: 'voice', + sourceConversationId: conversationId, + callSessionId, + pendingQuestionId: pendingQuestion.id, + questionText: pendingQuestion.questionText, + expiresAt, + }); + + log.info( + { requestId: request.id, requestCode: request.requestCode, callSessionId }, + 'Created guardian action request', + ); + + // Determine delivery destinations + const destinations: Array<{ + channel: string; + chatId?: string; + externalUserId?: string; + }> = []; + + // Telegram guardian binding + const telegramBinding = getActiveBinding(assistantId, 'telegram'); + if (telegramBinding) { + destinations.push({ + channel: 'telegram', + chatId: telegramBinding.guardianDeliveryChatId, + externalUserId: telegramBinding.guardianExternalUserId, + }); + } + + // SMS guardian binding + const smsBinding = getActiveBinding(assistantId, 'sms'); + if (smsBinding) { + destinations.push({ + channel: 'sms', + chatId: smsBinding.guardianDeliveryChatId, + externalUserId: smsBinding.guardianExternalUserId, + }); + } + + // Mac (internal) delivery — always created + destinations.push({ channel: 'mac' }); + + // Create delivery rows and dispatch + for (const dest of destinations) { + const delivery = createGuardianActionDelivery({ + requestId: request.id, + destinationChannel: dest.channel, + destinationChatId: dest.chatId, + destinationExternalUserId: dest.externalUserId, + }); + + if (dest.channel === 'mac') { + // Emit IPC event for the mac client + if (broadcast) { + broadcast({ + type: 'guardian_request_thread_created', + conversationId, + requestId: request.id, + callSessionId, + title: `Guardian question: ${pendingQuestion.questionText.slice(0, 80)}`, + } as ServerMessage); + } + updateDeliveryStatus(delivery.id, 'sent'); + log.info({ deliveryId: delivery.id, channel: 'mac' }, 'Mac guardian delivery emitted'); + } else { + // External channel — POST to gateway + void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId); + } + } + } catch (err) { + log.error({ err, callSessionId }, 'Failed to dispatch guardian question'); + } +} + +async function deliverToExternalChannel( + deliveryId: string, + channel: string, + chatId: string, + questionText: string, + requestCode: string, + assistantId: string, +): Promise { + const gatewayBase = getGatewayBaseUrl(); + const deliverUrl = `${gatewayBase}/deliver/${channel}`; + + const messageText = [ + `Your assistant needs your input during a phone call.`, + ``, + `Question: ${questionText}`, + ``, + `Reply to this message with your answer. (ref: ${requestCode})`, + ].join('\n'); + + try { + await deliverChannelReply(deliverUrl, { + chatId, + text: messageText, + assistantId, + }); + updateDeliveryStatus(deliveryId, 'sent'); + log.info({ deliveryId, channel, chatId }, 'External guardian delivery sent'); + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + updateDeliveryStatus(deliveryId, 'failed', errorMsg); + log.error({ err, deliveryId, channel, chatId }, 'External guardian delivery failed'); + } +} diff --git a/assistant/src/daemon/ipc-contract-inventory.json b/assistant/src/daemon/ipc-contract-inventory.json index 723811779cf..17cb10c4787 100644 --- a/assistant/src/daemon/ipc-contract-inventory.json +++ b/assistant/src/daemon/ipc-contract-inventory.json @@ -161,6 +161,7 @@ "GenerationCancelled", "GenerationHandoff", "GetSigningIdentityRequest", + "GuardianRequestThreadCreated", "GuardianVerificationResponse", "HistoryResponse", "HomeBaseGetResponse", @@ -415,6 +416,7 @@ "generation_cancelled", "generation_handoff", "get_signing_identity", + "guardian_request_thread_created", "guardian_verification_response", "history_response", "home_base_get_response", diff --git a/assistant/src/daemon/ipc-contract.ts b/assistant/src/daemon/ipc-contract.ts index 1c34386d855..e1413121966 100644 --- a/assistant/src/daemon/ipc-contract.ts +++ b/assistant/src/daemon/ipc-contract.ts @@ -2382,6 +2382,15 @@ export interface TaskRunThreadCreated { title: string; } +/** Server push — broadcast when a guardian action request creates a thread for the mac channel. */ +export interface GuardianRequestThreadCreated { + type: 'guardian_request_thread_created'; + conversationId: string; + requestId: string; + callSessionId: string; + title: string; +} + // === Workspace File Responses ──────────────────────────────────────────────── export interface WorkspaceFilesListResponse { @@ -2588,6 +2597,7 @@ export type ServerMessage = | WorkItemCancelResponse | WorkItemStatusChanged | TaskRunThreadCreated + | GuardianRequestThreadCreated | TasksChanged | OpenTasksWindow | SubagentSpawned diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts index 47560eb14bc..8218d877c81 100644 --- a/assistant/src/memory/db-init.ts +++ b/assistant/src/memory/db-init.ts @@ -14,6 +14,7 @@ import { migrateCallSessionsProviderSidDedup, migrateCallSessionsAddInitiatedFrom, migrateMemoryFtsBackfill, + migrateGuardianActionTables, } from './schema-migration.js'; const log = getLogger('memory-db'); @@ -1166,5 +1167,7 @@ export function initializeDb(): void { database.run(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_media_event_feedback_event_id ON media_event_feedback(event_id)`); database.run(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_media_event_feedback_type ON media_event_feedback(asset_id, feedback_type)`); + migrateGuardianActionTables(database); + migrateMemoryFtsBackfill(database); } diff --git a/assistant/src/memory/guardian-action-store.ts b/assistant/src/memory/guardian-action-store.ts new file mode 100644 index 00000000000..ec655b8026f --- /dev/null +++ b/assistant/src/memory/guardian-action-store.ts @@ -0,0 +1,358 @@ +/** + * Store for cross-channel guardian action requests and deliveries. + * + * Guardian action requests are created when a voice call's ASK_GUARDIAN + * marker fires, and deliveries track per-channel dispatch (telegram, sms, mac). + * Resolution uses first-response-wins semantics: the first channel to + * answer resolves the request and all other deliveries are marked answered. + */ + +import { and, eq } from 'drizzle-orm'; +import { v4 as uuid } from 'uuid'; +import { getDb } from './db.js'; +import { + guardianActionRequests, + guardianActionDeliveries, +} from './schema.js'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type GuardianActionRequestStatus = 'pending' | 'answered' | 'expired' | 'cancelled'; +export type GuardianActionDeliveryStatus = 'pending' | 'sent' | 'failed' | 'answered' | 'expired' | 'cancelled'; + +export interface GuardianActionRequest { + id: string; + assistantId: string; + kind: string; + sourceChannel: string; + sourceConversationId: string; + callSessionId: string; + pendingQuestionId: string; + questionText: string; + requestCode: string; + status: GuardianActionRequestStatus; + answerText: string | null; + answeredByChannel: string | null; + answeredByExternalUserId: string | null; + answeredAt: number | null; + expiresAt: number; + createdAt: number; + updatedAt: number; +} + +export interface GuardianActionDelivery { + id: string; + requestId: string; + destinationChannel: string; + destinationConversationId: string | null; + destinationChatId: string | null; + destinationExternalUserId: string | null; + status: GuardianActionDeliveryStatus; + sentAt: number | null; + respondedAt: number | null; + lastError: string | null; + createdAt: number; + updatedAt: number; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function rowToRequest(row: typeof guardianActionRequests.$inferSelect): GuardianActionRequest { + return { + id: row.id, + assistantId: row.assistantId, + kind: row.kind, + sourceChannel: row.sourceChannel, + sourceConversationId: row.sourceConversationId, + callSessionId: row.callSessionId, + pendingQuestionId: row.pendingQuestionId, + questionText: row.questionText, + requestCode: row.requestCode, + status: row.status as GuardianActionRequestStatus, + answerText: row.answerText, + answeredByChannel: row.answeredByChannel, + answeredByExternalUserId: row.answeredByExternalUserId, + answeredAt: row.answeredAt, + expiresAt: row.expiresAt, + createdAt: row.createdAt, + updatedAt: row.updatedAt, + }; +} + +function rowToDelivery(row: typeof guardianActionDeliveries.$inferSelect): GuardianActionDelivery { + return { + id: row.id, + requestId: row.requestId, + destinationChannel: row.destinationChannel, + destinationConversationId: row.destinationConversationId, + destinationChatId: row.destinationChatId, + destinationExternalUserId: row.destinationExternalUserId, + status: row.status as GuardianActionDeliveryStatus, + sentAt: row.sentAt, + respondedAt: row.respondedAt, + lastError: row.lastError, + createdAt: row.createdAt, + updatedAt: row.updatedAt, + }; +} + +/** Generate a short human-readable request code (6 hex chars). */ +function generateRequestCode(): string { + return uuid().replace(/-/g, '').slice(0, 6).toUpperCase(); +} + +// --------------------------------------------------------------------------- +// Guardian Action Requests +// --------------------------------------------------------------------------- + +export function createGuardianActionRequest(params: { + assistantId?: string; + kind: string; + sourceChannel: string; + sourceConversationId: string; + callSessionId: string; + pendingQuestionId: string; + questionText: string; + expiresAt: number; +}): GuardianActionRequest { + const db = getDb(); + const now = Date.now(); + const id = uuid(); + + const row = { + id, + assistantId: params.assistantId ?? 'self', + kind: params.kind, + sourceChannel: params.sourceChannel, + sourceConversationId: params.sourceConversationId, + callSessionId: params.callSessionId, + pendingQuestionId: params.pendingQuestionId, + questionText: params.questionText, + requestCode: generateRequestCode(), + status: 'pending' as const, + answerText: null, + answeredByChannel: null, + answeredByExternalUserId: null, + answeredAt: null, + expiresAt: params.expiresAt, + createdAt: now, + updatedAt: now, + }; + + db.insert(guardianActionRequests).values(row).run(); + return rowToRequest(row); +} + +export function getGuardianActionRequest(id: string): GuardianActionRequest | null { + const db = getDb(); + const row = db + .select() + .from(guardianActionRequests) + .where(eq(guardianActionRequests.id, id)) + .get(); + return row ? rowToRequest(row) : null; +} + +export function getByPendingQuestionId(questionId: string): GuardianActionRequest | null { + const db = getDb(); + const row = db + .select() + .from(guardianActionRequests) + .where(eq(guardianActionRequests.pendingQuestionId, questionId)) + .get(); + return row ? rowToRequest(row) : null; +} + +/** + * First-response-wins resolution. Checks that the request is still + * 'pending' before updating; returns the updated request on success + * or null if the request was already resolved. + */ +export function resolveGuardianActionRequest( + id: string, + answerText: string, + answeredByChannel: string, + answeredByExternalUserId?: string, +): GuardianActionRequest | null { + const db = getDb(); + const now = Date.now(); + + // Atomically check-and-update: only update if status is still 'pending' + db.update(guardianActionRequests) + .set({ + status: 'answered', + answerText, + answeredByChannel, + answeredByExternalUserId: answeredByExternalUserId ?? null, + answeredAt: now, + updatedAt: now, + }) + .where( + and( + eq(guardianActionRequests.id, id), + eq(guardianActionRequests.status, 'pending'), + ), + ) + .run(); + + // Check if the update took effect + const raw = (db as unknown as { $client: import('bun:sqlite').Database }).$client; + const changes = raw.query('SELECT changes() as c').get() as { c: number }; + if (changes.c === 0) return null; + + // Mark all deliveries as 'answered' + db.update(guardianActionDeliveries) + .set({ status: 'answered', respondedAt: now, updatedAt: now }) + .where(eq(guardianActionDeliveries.requestId, id)) + .run(); + + return getGuardianActionRequest(id); +} + +/** + * Expire a guardian action request and all its deliveries. + */ +export function expireGuardianActionRequest(id: string): void { + const db = getDb(); + const now = Date.now(); + + db.update(guardianActionRequests) + .set({ status: 'expired', updatedAt: now }) + .where( + and( + eq(guardianActionRequests.id, id), + eq(guardianActionRequests.status, 'pending'), + ), + ) + .run(); + + db.update(guardianActionDeliveries) + .set({ status: 'expired', updatedAt: now }) + .where( + and( + eq(guardianActionDeliveries.requestId, id), + eq(guardianActionDeliveries.status, 'pending'), + ), + ) + .run(); +} + +/** + * Cancel a guardian action request and all its deliveries. + */ +export function cancelGuardianActionRequest(id: string): void { + const db = getDb(); + const now = Date.now(); + + db.update(guardianActionRequests) + .set({ status: 'cancelled', updatedAt: now }) + .where( + and( + eq(guardianActionRequests.id, id), + eq(guardianActionRequests.status, 'pending'), + ), + ) + .run(); + + db.update(guardianActionDeliveries) + .set({ status: 'cancelled', updatedAt: now }) + .where( + and( + eq(guardianActionDeliveries.requestId, id), + eq(guardianActionDeliveries.status, 'pending'), + ), + ) + .run(); +} + +// --------------------------------------------------------------------------- +// Guardian Action Deliveries +// --------------------------------------------------------------------------- + +export function createGuardianActionDelivery(params: { + requestId: string; + destinationChannel: string; + destinationConversationId?: string; + destinationChatId?: string; + destinationExternalUserId?: string; +}): GuardianActionDelivery { + const db = getDb(); + const now = Date.now(); + const id = uuid(); + + const row = { + id, + requestId: params.requestId, + destinationChannel: params.destinationChannel, + destinationConversationId: params.destinationConversationId ?? null, + destinationChatId: params.destinationChatId ?? null, + destinationExternalUserId: params.destinationExternalUserId ?? null, + status: 'pending' as const, + sentAt: null, + respondedAt: null, + lastError: null, + createdAt: now, + updatedAt: now, + }; + + db.insert(guardianActionDeliveries).values(row).run(); + return rowToDelivery(row); +} + +/** + * Look up pending deliveries for a specific destination. + * Used by inbound message routing to match incoming answers to deliveries. + */ +export function getPendingDeliveriesByDestination( + assistantId: string, + channel: string, + chatId: string, +): GuardianActionDelivery[] { + const db = getDb(); + + // Join deliveries with requests to filter by assistantId + const rows = db + .select({ + delivery: guardianActionDeliveries, + }) + .from(guardianActionDeliveries) + .innerJoin( + guardianActionRequests, + eq(guardianActionDeliveries.requestId, guardianActionRequests.id), + ) + .where( + and( + eq(guardianActionRequests.assistantId, assistantId), + eq(guardianActionRequests.status, 'pending'), + eq(guardianActionDeliveries.destinationChannel, channel), + eq(guardianActionDeliveries.destinationChatId, chatId), + eq(guardianActionDeliveries.status, 'sent'), + ), + ) + .all(); + + return rows.map((r) => rowToDelivery(r.delivery)); +} + +export function updateDeliveryStatus( + deliveryId: string, + status: GuardianActionDeliveryStatus, + error?: string, +): void { + const db = getDb(); + const now = Date.now(); + + const updates: Record = { status, updatedAt: now }; + if (status === 'sent') updates.sentAt = now; + if (status === 'answered') updates.respondedAt = now; + if (error !== undefined) updates.lastError = error; + + db.update(guardianActionDeliveries) + .set(updates) + .where(eq(guardianActionDeliveries.id, deliveryId)) + .run(); +} diff --git a/assistant/src/memory/schema-migration.ts b/assistant/src/memory/schema-migration.ts index f8322fcc3e9..f1bb7ac5f4a 100644 --- a/assistant/src/memory/schema-migration.ts +++ b/assistant/src/memory/schema-migration.ts @@ -1000,3 +1000,61 @@ export function migrateCallSessionsAddInitiatedFrom(database: Db): void { // Column already exists — nothing to do. } } + +/** + * Create guardian_action_requests and guardian_action_deliveries tables + * for cross-channel voice guardian dispatch. + * + * Uses CREATE TABLE IF NOT EXISTS + CREATE INDEX IF NOT EXISTS for + * idempotency across restarts. + */ +export function migrateGuardianActionTables(database: Db): void { + const raw = (database as unknown as { $client: Database }).$client; + + raw.exec(/*sql*/ ` + CREATE TABLE IF NOT EXISTS guardian_action_requests ( + id TEXT PRIMARY KEY, + assistant_id TEXT NOT NULL DEFAULT 'self', + kind TEXT NOT NULL, + source_channel TEXT NOT NULL, + source_conversation_id TEXT NOT NULL, + call_session_id TEXT NOT NULL REFERENCES call_sessions(id) ON DELETE CASCADE, + pending_question_id TEXT NOT NULL REFERENCES call_pending_questions(id) ON DELETE CASCADE, + question_text TEXT NOT NULL, + request_code TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + answer_text TEXT, + answered_by_channel TEXT, + answered_by_external_user_id TEXT, + answered_at INTEGER, + expires_at INTEGER NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL + ) + `); + + raw.exec(/*sql*/ ` + CREATE TABLE IF NOT EXISTS guardian_action_deliveries ( + id TEXT PRIMARY KEY, + request_id TEXT NOT NULL REFERENCES guardian_action_requests(id) ON DELETE CASCADE, + destination_channel TEXT NOT NULL, + destination_conversation_id TEXT, + destination_chat_id TEXT, + destination_external_user_id TEXT, + status TEXT NOT NULL DEFAULT 'pending', + sent_at INTEGER, + responded_at INTEGER, + last_error TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL + ) + `); + + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_requests_status ON guardian_action_requests(status)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_requests_call_session ON guardian_action_requests(call_session_id)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_requests_pending_question ON guardian_action_requests(pending_question_id)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_requests_request_code ON guardian_action_requests(request_code)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_deliveries_request_id ON guardian_action_deliveries(request_id)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_deliveries_status ON guardian_action_deliveries(status)`); + raw.exec(/*sql*/ `CREATE INDEX IF NOT EXISTS idx_guardian_action_deliveries_destination ON guardian_action_deliveries(destination_channel, destination_chat_id)`); +} diff --git a/assistant/src/memory/schema.ts b/assistant/src/memory/schema.ts index d4cc135de31..7e4c5ed526a 100644 --- a/assistant/src/memory/schema.ts +++ b/assistant/src/memory/schema.ts @@ -780,3 +780,48 @@ export const mediaEventFeedback = sqliteTable('media_event_feedback', { notes: text('notes'), createdAt: integer('created_at').notNull(), }); + +// ── Guardian Action Requests (cross-channel voice guardian) ────────── + +export const guardianActionRequests = sqliteTable('guardian_action_requests', { + id: text('id').primaryKey(), + assistantId: text('assistant_id').notNull().default('self'), + kind: text('kind').notNull(), // 'ask_guardian' + sourceChannel: text('source_channel').notNull(), // 'voice' + sourceConversationId: text('source_conversation_id').notNull(), + callSessionId: text('call_session_id') + .notNull() + .references(() => callSessions.id, { onDelete: 'cascade' }), + pendingQuestionId: text('pending_question_id') + .notNull() + .references(() => callPendingQuestions.id, { onDelete: 'cascade' }), + questionText: text('question_text').notNull(), + requestCode: text('request_code').notNull(), // short human-readable code for routing replies + status: text('status').notNull().default('pending'), // pending | answered | expired | cancelled + answerText: text('answer_text'), + answeredByChannel: text('answered_by_channel'), + answeredByExternalUserId: text('answered_by_external_user_id'), + answeredAt: integer('answered_at'), + expiresAt: integer('expires_at').notNull(), + createdAt: integer('created_at').notNull(), + updatedAt: integer('updated_at').notNull(), +}); + +// ── Guardian Action Deliveries (per-channel delivery tracking) ─────── + +export const guardianActionDeliveries = sqliteTable('guardian_action_deliveries', { + id: text('id').primaryKey(), + requestId: text('request_id') + .notNull() + .references(() => guardianActionRequests.id, { onDelete: 'cascade' }), + destinationChannel: text('destination_channel').notNull(), // 'telegram' | 'sms' | 'mac' + destinationConversationId: text('destination_conversation_id'), + destinationChatId: text('destination_chat_id'), + destinationExternalUserId: text('destination_external_user_id'), + status: text('status').notNull().default('pending'), // pending | sent | failed | answered | expired | cancelled + sentAt: integer('sent_at'), + respondedAt: integer('responded_at'), + lastError: text('last_error'), + createdAt: integer('created_at').notNull(), + updatedAt: integer('updated_at').notNull(), +}); diff --git a/clients/macos/vellum-assistant/App/AppDelegate.swift b/clients/macos/vellum-assistant/App/AppDelegate.swift index 135a3e018da..4025602d6f4 100644 --- a/clients/macos/vellum-assistant/App/AppDelegate.swift +++ b/clients/macos/vellum-assistant/App/AppDelegate.swift @@ -635,6 +635,22 @@ public final class AppDelegate: NSObject, NSApplicationDelegate { // Task run threads are no longer created automatically. Users can // opt in via the "Open in Chat" button in the task output view. + // Guardian request threads — created when a voice call's ASK_GUARDIAN dispatches + // a question to the mac channel so the user can see and respond in chat. + daemonClient.onGuardianRequestThreadCreated = { [weak self] msg in + guard let self, !self.isAwaitingFirstLaunchReady else { return } + self.mainWindow?.threadManager.createGuardianRequestThread( + conversationId: msg.conversationId, + requestId: msg.requestId, + callSessionId: msg.callSessionId, + title: msg.title + ) + if let thread = self.mainWindow?.threadManager.threads.first(where: { $0.sessionId == msg.conversationId }) { + self.mainWindow?.threadManager.activeThreadId = thread.id + } + self.showMainWindow() + } + // Handle escalation: text_qa -> computer_use via computer_use_request_control daemonClient.onTaskRouted = { [weak self] routed in guard let self else { return } diff --git a/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift b/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift index b8f77df0d91..1feae8255f1 100644 --- a/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift +++ b/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift @@ -180,6 +180,27 @@ final class ThreadManager: ObservableObject, ThreadRestorerDelegate { log.info("Created task run thread \(thread.id) for conversation \(conversationId) (work item \(workItemId))") } + /// Create a visible thread bound to an existing guardian action request conversation. + /// Called when the daemon broadcasts `guardian_request_thread_created` so the user + /// can see and respond to guardian questions from a voice call. + func createGuardianRequestThread(conversationId: String, requestId: String, callSessionId: String, title: String) { + // Avoid creating a duplicate thread if one already exists for this conversation + if threads.contains(where: { $0.sessionId == conversationId }) { + return + } + + let thread = ThreadModel(title: title, sessionId: conversationId) + let viewModel = makeViewModel() + viewModel.sessionId = conversationId + // Start the message loop so the view model receives streamed messages + viewModel.startMessageLoop() + + threads.insert(thread, at: 0) + chatViewModels[thread.id] = viewModel + + log.info("Created guardian request thread \(thread.id) for conversation \(conversationId) (request \(requestId), call \(callSessionId))") + } + func closeThread(id: UUID) { // No-op if only 1 thread remains guard threads.count > 1 else { return } diff --git a/clients/shared/IPC/DaemonClient.swift b/clients/shared/IPC/DaemonClient.swift index 67b361b4407..7053149e193 100644 --- a/clients/shared/IPC/DaemonClient.swift +++ b/clients/shared/IPC/DaemonClient.swift @@ -393,6 +393,9 @@ public final class DaemonClient: ObservableObject, DaemonClientProtocol { /// Called when a task run creates a conversation so the client can show it as a visible chat thread. public var onTaskRunThreadCreated: ((IPCTaskRunThreadCreated) -> Void)? + /// Called when a guardian action request creates a thread for the mac channel. + public var onGuardianRequestThreadCreated: ((IPCGuardianRequestThreadCreated) -> Void)? + /// Called when the daemon wants us to open/focus the tasks window. public var onOpenTasksWindow: (() -> Void)? diff --git a/clients/shared/IPC/DaemonMessageRouter.swift b/clients/shared/IPC/DaemonMessageRouter.swift index 265a58d93f8..8e980994f44 100644 --- a/clients/shared/IPC/DaemonMessageRouter.swift +++ b/clients/shared/IPC/DaemonMessageRouter.swift @@ -228,6 +228,8 @@ extension DaemonClient { onWorkItemCancelResponse?(msg) case .taskRunThreadCreated(let msg): onTaskRunThreadCreated?(msg) + case .guardianRequestThreadCreated(let msg): + onGuardianRequestThreadCreated?(msg) case .openTasksWindow: onOpenTasksWindow?() case .subagentSpawned(let msg): diff --git a/clients/shared/IPC/Generated/IPCContractGenerated.swift b/clients/shared/IPC/Generated/IPCContractGenerated.swift index 7edd856e5b2..5a3d70998ae 100644 --- a/clients/shared/IPC/Generated/IPCContractGenerated.swift +++ b/clients/shared/IPC/Generated/IPCContractGenerated.swift @@ -1596,6 +1596,23 @@ public struct IPCGetSigningIdentityResponse: Codable, Sendable { } } +/// Server push — broadcast when a guardian action request creates a thread for the mac channel. +public struct IPCGuardianRequestThreadCreated: Codable, Sendable { + public let type: String + public let conversationId: String + public let requestId: String + public let callSessionId: String + public let title: String + + public init(type: String, conversationId: String, requestId: String, callSessionId: String, title: String) { + self.type = type + self.conversationId = conversationId + self.requestId = requestId + self.callSessionId = callSessionId + self.title = title + } +} + public struct IPCGuardianVerificationRequest: Codable, Sendable { public let type: String public let action: String diff --git a/clients/shared/IPC/IPCMessages.swift b/clients/shared/IPC/IPCMessages.swift index a52a46b1799..e3e014a6bc2 100644 --- a/clients/shared/IPC/IPCMessages.swift +++ b/clients/shared/IPC/IPCMessages.swift @@ -2151,6 +2151,7 @@ public enum ServerMessage: Decodable, Sendable { case workItemApprovePermissionsResponse(IPCWorkItemApprovePermissionsResponse) case workItemCancelResponse(IPCWorkItemCancelResponse) case taskRunThreadCreated(IPCTaskRunThreadCreated) + case guardianRequestThreadCreated(IPCGuardianRequestThreadCreated) case openTasksWindow(OpenTasksWindowMessage) case subagentSpawned(IPCSubagentSpawned) case subagentStatusChanged(IPCSubagentStatusChanged) @@ -2499,6 +2500,9 @@ public enum ServerMessage: Decodable, Sendable { case "task_run_thread_created": let message = try IPCTaskRunThreadCreated(from: decoder) self = .taskRunThreadCreated(message) + case "guardian_request_thread_created": + let message = try IPCGuardianRequestThreadCreated(from: decoder) + self = .guardianRequestThreadCreated(message) case "open_tasks_window": let message = try OpenTasksWindowMessage(from: decoder) self = .openTasksWindow(message) From 90d9e087ae042610a32170b61baa4de4fca8e5d7 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:53:33 -0500 Subject: [PATCH 07/13] feat: cross-channel guardian answer resolution (#7535) When a guardian action request is dispatched to telegram/sms/mac channels during a voice call, replies on any of those channels are now intercepted, validated, and used to resume the call: - Channel inbound (telegram/sms): intercept guardian answers early in handleChannelInbound(), with identity verification, single/multi-delivery disambiguation via request codes, and first-writer-wins resolution - Mac thread: intercept in session-process processMessage() before the agent loop, routing the user message as a guardian answer - Guardian dispatch: create mac conversations server-side with getOrCreateConversation() and seed them with the question text - Store: add getPendingDeliveryByConversation() for mac channel routing Co-authored-by: Claude Opus 4.6 --- assistant/src/calls/guardian-dispatch.ts | 38 +++++-- assistant/src/daemon/session-process.ts | 47 ++++++++ assistant/src/memory/guardian-action-store.ts | 23 ++++ .../src/runtime/routes/channel-routes.ts | 107 ++++++++++++++++++ 4 files changed, 205 insertions(+), 10 deletions(-) diff --git a/assistant/src/calls/guardian-dispatch.ts b/assistant/src/calls/guardian-dispatch.ts index f286f7ec991..9f4e7cc0927 100644 --- a/assistant/src/calls/guardian-dispatch.ts +++ b/assistant/src/calls/guardian-dispatch.ts @@ -18,6 +18,8 @@ import { } from '../memory/guardian-action-store.js'; import { deliverChannelReply } from '../runtime/gateway-client.js'; import { getUserConsultationTimeoutMs } from './call-constants.js'; +import { getOrCreateConversation } from '../memory/conversation-key-store.js'; +import { addMessage } from '../memory/conversation-store.js'; import type { CallPendingQuestion } from './types.js'; import type { ServerMessage } from '../daemon/ipc-contract.js'; @@ -106,27 +108,43 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): // Create delivery rows and dispatch for (const dest of destinations) { - const delivery = createGuardianActionDelivery({ - requestId: request.id, - destinationChannel: dest.channel, - destinationChatId: dest.chatId, - destinationExternalUserId: dest.externalUserId, - }); - if (dest.channel === 'mac') { - // Emit IPC event for the mac client + // Create a dedicated server-side conversation for the mac guardian thread + const macConvKey = `asst:${assistantId}:guardian:request:${request.id}`; + const { conversationId: macConversationId } = getOrCreateConversation(macConvKey); + + const delivery = createGuardianActionDelivery({ + requestId: request.id, + destinationChannel: 'mac', + destinationConversationId: macConversationId, + }); + + // Add the guardian question as the initial message in the thread + addMessage( + macConversationId, + 'assistant', + JSON.stringify(`Your assistant needs your input during a phone call.\n\nQuestion: ${request.questionText}\n\nReply to this message with your answer.`), + ); + + // Emit IPC event for the mac client with the server-created conversation if (broadcast) { broadcast({ type: 'guardian_request_thread_created', - conversationId, + conversationId: macConversationId, requestId: request.id, callSessionId, title: `Guardian question: ${pendingQuestion.questionText.slice(0, 80)}`, } as ServerMessage); } updateDeliveryStatus(delivery.id, 'sent'); - log.info({ deliveryId: delivery.id, channel: 'mac' }, 'Mac guardian delivery emitted'); + log.info({ deliveryId: delivery.id, channel: 'mac', macConversationId }, 'Mac guardian delivery emitted'); } else { + const delivery = createGuardianActionDelivery({ + requestId: request.id, + destinationChannel: dest.channel, + destinationChatId: dest.chatId, + destinationExternalUserId: dest.externalUserId, + }); // External channel — POST to gateway void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId); } diff --git a/assistant/src/daemon/session-process.ts b/assistant/src/daemon/session-process.ts index 489ffd5e254..1afea07a075 100644 --- a/assistant/src/daemon/session-process.ts +++ b/assistant/src/daemon/session-process.ts @@ -14,6 +14,12 @@ import type { QueueDrainReason } from './session-queue-manager.js'; import type { TraceEmitter } from './trace-emitter.js'; import { createUserMessage, createAssistantMessage } from '../agent/message-types.js'; import * as conversationStore from '../memory/conversation-store.js'; +import { + getPendingDeliveryByConversation, + getGuardianActionRequest, + resolveGuardianActionRequest, +} from '../memory/guardian-action-store.js'; +import { answerCall } from '../calls/call-domain.js'; import { resolveSlash, type SlashContext } from './session-slash.js'; import { getConfig } from '../config/loader.js'; import { getLogger } from '../util/logger.js'; @@ -223,6 +229,47 @@ export async function processMessage( session.currentActiveSurfaceId = activeSurfaceId; session.currentPage = currentPage; + // ── Guardian action answer interception (mac channel) ── + // If this conversation has a pending guardian action delivery, treat the + // user message as the guardian's answer instead of running the agent loop. + const guardianDelivery = getPendingDeliveryByConversation(session.conversationId); + if (guardianDelivery) { + const guardianRequest = getGuardianActionRequest(guardianDelivery.requestId); + if (guardianRequest && guardianRequest.status === 'pending') { + const resolved = resolveGuardianActionRequest(guardianRequest.id, content, 'mac'); + const userMsg = createUserMessage(content, attachments); + const persisted = conversationStore.addMessage( + session.conversationId, + 'user', + JSON.stringify(userMsg.content), + ); + session.messages.push(userMsg); + + if (resolved) { + void answerCall({ callSessionId: guardianRequest.callSessionId, answer: content }); + const confirmMsg = createAssistantMessage('Your answer has been relayed to the call.'); + conversationStore.addMessage( + session.conversationId, + 'assistant', + JSON.stringify(confirmMsg.content), + ); + session.messages.push(confirmMsg); + onEvent({ type: 'assistant_text_delta', text: 'Your answer has been relayed to the call.' }); + } else { + const staleMsg = createAssistantMessage('This question has already been answered from another channel.'); + conversationStore.addMessage( + session.conversationId, + 'assistant', + JSON.stringify(staleMsg.content), + ); + session.messages.push(staleMsg); + onEvent({ type: 'assistant_text_delta', text: 'This question has already been answered from another channel.' }); + } + onEvent({ type: 'message_complete', sessionId: session.conversationId }); + return persisted.id; + } + } + // Resolve slash commands before persistence const slashResult = resolveSlash(content, buildSlashContext(session)); diff --git a/assistant/src/memory/guardian-action-store.ts b/assistant/src/memory/guardian-action-store.ts index ec655b8026f..2f993ef4143 100644 --- a/assistant/src/memory/guardian-action-store.ts +++ b/assistant/src/memory/guardian-action-store.ts @@ -338,6 +338,29 @@ export function getPendingDeliveriesByDestination( return rows.map((r) => rowToDelivery(r.delivery)); } +/** + * Look up a pending delivery by destination conversation ID (for mac channel routing). + */ +export function getPendingDeliveryByConversation(conversationId: string): GuardianActionDelivery | null { + const db = getDb(); + const rows = db + .select({ delivery: guardianActionDeliveries }) + .from(guardianActionDeliveries) + .innerJoin( + guardianActionRequests, + eq(guardianActionDeliveries.requestId, guardianActionRequests.id), + ) + .where( + and( + eq(guardianActionDeliveries.destinationConversationId, conversationId), + eq(guardianActionDeliveries.status, 'sent'), + eq(guardianActionRequests.status, 'pending'), + ), + ) + .all(); + return rows.length > 0 ? rowToDelivery(rows[0].delivery) : null; +} + export function updateDeliveryStatus( deliveryId: string, status: GuardianActionDeliveryStatus, diff --git a/assistant/src/runtime/routes/channel-routes.ts b/assistant/src/runtime/routes/channel-routes.ts index 33f6e2005e3..373eb071b06 100644 --- a/assistant/src/runtime/routes/channel-routes.ts +++ b/assistant/src/runtime/routes/channel-routes.ts @@ -18,6 +18,12 @@ import { isGuardian, validateAndConsumeChallenge, } from '../channel-guardian-service.js'; +import { + getPendingDeliveriesByDestination, + getGuardianActionRequest, + resolveGuardianActionRequest, +} from '../../memory/guardian-action-store.js'; +import { answerCall } from '../../calls/call-domain.js'; import { createApprovalRequest, getPendingApprovalByGuardianChat, @@ -553,6 +559,107 @@ export async function handleChannelInbound( } } + // ── Guardian action answer interception ── + // Check if this inbound message is a reply to a cross-channel guardian + // action request (from a voice call). Must run before approval interception + // so guardian answers are not mistakenly routed into the approval flow. + if ( + !result.duplicate && + trimmedContent.length > 0 && + body.senderExternalUserId && + replyCallbackUrl + ) { + const pendingDeliveries = getPendingDeliveriesByDestination(assistantId, sourceChannel, externalChatId); + if (pendingDeliveries.length > 0) { + // Identity check: only the designated guardian can answer + const validDeliveries = pendingDeliveries.filter( + (d) => d.destinationExternalUserId === body.senderExternalUserId, + ); + + if (validDeliveries.length > 0) { + let matchedDelivery = validDeliveries.length === 1 ? validDeliveries[0] : null; + let answerText = trimmedContent; + + // Multiple pending deliveries: require request code prefix for disambiguation + if (validDeliveries.length > 1) { + for (const d of validDeliveries) { + const req = getGuardianActionRequest(d.requestId); + if (req && trimmedContent.toUpperCase().startsWith(req.requestCode)) { + matchedDelivery = d; + answerText = trimmedContent.slice(req.requestCode.length).trim(); + break; + } + } + + if (!matchedDelivery) { + // Send disambiguation message listing the request codes + const codes = validDeliveries + .map((d) => { + const req = getGuardianActionRequest(d.requestId); + return req ? req.requestCode : null; + }) + .filter(Boolean); + try { + await deliverChannelReply(replyCallbackUrl, { + chatId: externalChatId, + text: `You have multiple pending guardian questions. Please prefix your reply with the reference code (${codes.join(', ')}) to indicate which question you are answering.`, + assistantId, + }, bearerToken); + } catch (err) { + log.error({ err, externalChatId }, 'Failed to deliver guardian action disambiguation message'); + } + return Response.json({ + accepted: true, + duplicate: false, + eventId: result.eventId, + guardianAnswer: 'disambiguation_sent', + }); + } + } + + if (matchedDelivery) { + const request = getGuardianActionRequest(matchedDelivery.requestId); + if (request) { + const resolved = resolveGuardianActionRequest( + request.id, + answerText, + sourceChannel, + body.senderExternalUserId, + ); + + if (resolved) { + // Route the answer to the voice call + void answerCall({ callSessionId: request.callSessionId, answer: answerText }); + return Response.json({ + accepted: true, + duplicate: false, + eventId: result.eventId, + guardianAnswer: 'resolved', + }); + } else { + // Already answered from another channel + try { + await deliverChannelReply(replyCallbackUrl, { + chatId: externalChatId, + text: 'This question has already been answered from another channel.', + assistantId, + }, bearerToken); + } catch (err) { + log.error({ err, externalChatId }, 'Failed to deliver guardian action stale notice'); + } + return Response.json({ + accepted: true, + duplicate: false, + eventId: result.eventId, + guardianAnswer: 'stale', + }); + } + } + } + } + } + } + // ── Actor role resolution ── // Determine whether the sender is the guardian for this channel. // When a guardian binding exists, non-guardian actors get stricter From f4c6182f62b8070e9cf6e6bf53fcbd0e44d23b30 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 22:57:27 -0500 Subject: [PATCH 08/13] feat: guardian action expiry sweep, voice thread visibility, and voice settings card (#7536) Add periodic sweep (60s interval) for expired cross-channel guardian action requests. When a request expires: marks request+deliveries as expired, expires pending questions, and sends expiry notices to external channels and mac threads. Allow voice-channel threads to appear in the desktop thread list by updating the session filter in both ThreadSessionRestorer and ThreadManager to pass through sessions with sourceChannel == "voice". Add a Voice (Phone Calls) card to the Settings Connect tab showing Twilio credential and phone number readiness for voice calls. Co-authored-by: Claude Opus 4.6 --- assistant/src/calls/guardian-action-sweep.ts | 104 ++++++++++++++++++ assistant/src/memory/guardian-action-store.ts | 34 +++++- assistant/src/runtime/http-server.ts | 9 ++ .../Features/MainWindow/ThreadManager.swift | 2 +- .../MainWindow/ThreadSessionRestorer.swift | 2 +- .../Settings/SettingsConnectTab.swift | 63 ++++++++++- 6 files changed, 210 insertions(+), 4 deletions(-) create mode 100644 assistant/src/calls/guardian-action-sweep.ts diff --git a/assistant/src/calls/guardian-action-sweep.ts b/assistant/src/calls/guardian-action-sweep.ts new file mode 100644 index 00000000000..1e5a00063a0 --- /dev/null +++ b/assistant/src/calls/guardian-action-sweep.ts @@ -0,0 +1,104 @@ +/** + * Periodic sweep for expired guardian action requests. + * + * Runs on a 60-second interval. When a request has passed its expiresAt + * timestamp: + * 1. Expires the request and all its deliveries in the store + * 2. Expires the associated pending question so the call-side timeout fires + * 3. Sends expiry notices to external delivery destinations (telegram, sms) + * 4. Adds an expiry message to mac guardian thread conversations + */ + +import { getLogger } from '../util/logger.js'; +import { + getExpiredGuardianActionRequests, + expireGuardianActionRequest, + getDeliveriesByRequestId, +} from '../memory/guardian-action-store.js'; +import { expirePendingQuestions } from './call-store.js'; +import { deliverChannelReply } from '../runtime/gateway-client.js'; +import { addMessage } from '../memory/conversation-store.js'; + +const log = getLogger('guardian-action-sweep'); + +const SWEEP_INTERVAL_MS = 60_000; + +let sweepTimer: ReturnType | null = null; + +/** + * Sweep expired guardian action requests and clean up. + */ +export function sweepExpiredGuardianActions( + gatewayBaseUrl: string, + bearerToken?: string, +): void { + const expired = getExpiredGuardianActionRequests(); + + for (const request of expired) { + // Capture deliveries before expiring (since expiry changes their status) + const deliveries = getDeliveriesByRequestId(request.id); + + // Expire the request and all deliveries + expireGuardianActionRequest(request.id); + + // Expire associated pending questions + expirePendingQuestions(request.callSessionId); + + log.info( + { requestId: request.id, callSessionId: request.callSessionId }, + 'Expired guardian action request', + ); + + // Send expiry notices to each delivery destination + for (const delivery of deliveries) { + if (delivery.status !== 'sent' && delivery.status !== 'pending') continue; + + if (delivery.destinationChannel === 'mac' && delivery.destinationConversationId) { + // Add expiry message to mac guardian thread + addMessage( + delivery.destinationConversationId, + 'assistant', + JSON.stringify('This guardian question has expired without a response.'), + ); + } else if (delivery.destinationChatId) { + // External channel — send expiry notice + const deliverUrl = `${gatewayBaseUrl}/deliver/${delivery.destinationChannel}`; + void (async () => { + try { + await deliverChannelReply(deliverUrl, { + chatId: delivery.destinationChatId!, + text: 'The guardian question has expired without a response. The call has moved on.', + assistantId: request.assistantId, + }, bearerToken); + } catch (err) { + log.error( + { err, deliveryId: delivery.id, channel: delivery.destinationChannel }, + 'Failed to deliver guardian action expiry notice', + ); + } + })(); + } + } + } +} + +export function startGuardianActionSweep( + gatewayBaseUrl: string, + bearerToken?: string, +): void { + if (sweepTimer) return; + sweepTimer = setInterval(() => { + try { + sweepExpiredGuardianActions(gatewayBaseUrl, bearerToken); + } catch (err) { + log.error({ err }, 'Guardian action sweep failed'); + } + }, SWEEP_INTERVAL_MS); +} + +export function stopGuardianActionSweep(): void { + if (sweepTimer) { + clearInterval(sweepTimer); + sweepTimer = null; + } +} diff --git a/assistant/src/memory/guardian-action-store.ts b/assistant/src/memory/guardian-action-store.ts index 2f993ef4143..aa335a6e884 100644 --- a/assistant/src/memory/guardian-action-store.ts +++ b/assistant/src/memory/guardian-action-store.ts @@ -7,7 +7,7 @@ * answer resolves the request and all other deliveries are marked answered. */ -import { and, eq } from 'drizzle-orm'; +import { and, eq, lt } from 'drizzle-orm'; import { v4 as uuid } from 'uuid'; import { getDb } from './db.js'; import { @@ -241,6 +241,38 @@ export function expireGuardianActionRequest(id: string): void { .run(); } +/** + * Get all pending guardian action requests that have expired. + */ +export function getExpiredGuardianActionRequests(): GuardianActionRequest[] { + const db = getDb(); + const now = Date.now(); + return db + .select() + .from(guardianActionRequests) + .where( + and( + eq(guardianActionRequests.status, 'pending'), + lt(guardianActionRequests.expiresAt, now), + ), + ) + .all() + .map(rowToRequest); +} + +/** + * Get all deliveries for a specific request. + */ +export function getDeliveriesByRequestId(requestId: string): GuardianActionDelivery[] { + const db = getDb(); + return db + .select() + .from(guardianActionDeliveries) + .where(eq(guardianActionDeliveries.requestId, requestId)) + .all() + .map(rowToDelivery); +} + /** * Cancel a guardian action request and all its deliveries. */ diff --git a/assistant/src/runtime/http-server.ts b/assistant/src/runtime/http-server.ts index 3f99480638c..6285633bbe5 100644 --- a/assistant/src/runtime/http-server.ts +++ b/assistant/src/runtime/http-server.ts @@ -44,6 +44,10 @@ import { startGuardianExpirySweep, stopGuardianExpirySweep, } from './routes/channel-routes.js'; +import { + startGuardianActionSweep, + stopGuardianActionSweep, +} from '../calls/guardian-action-sweep.js'; import * as channelDeliveryStore from '../memory/channel-delivery-store.js'; import * as conversationStore from '../memory/conversation-store.js'; import * as externalConversationStore from '../memory/external-conversation-store.js'; @@ -453,6 +457,10 @@ export class RuntimeHttpServer { log.info('Guardian approval expiry sweep started'); } + // Start guardian action request expiry sweep (cross-channel voice guardian) + startGuardianActionSweep(getGatewayBaseUrl(), this.bearerToken); + log.info('Guardian action expiry sweep started'); + // Startup guard: log gateway-only mode warnings log.info('Running in gateway-only ingress mode. Direct webhook routes disabled.'); if (!isLoopbackHost(this.hostname)) { @@ -464,6 +472,7 @@ export class RuntimeHttpServer { async stop(): Promise { stopGuardianExpirySweep(); + stopGuardianActionSweep(); if (this.retrySweepTimer) { clearInterval(this.retrySweepTimer); this.retrySweepTimer = null; diff --git a/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift b/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift index 1feae8255f1..b2d3eba592f 100644 --- a/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift +++ b/clients/macos/vellum-assistant/Features/MainWindow/ThreadManager.swift @@ -327,7 +327,7 @@ final class ThreadManager: ObservableObject, ThreadRestorerDelegate { serverOffset += response.sessions.count let recentSessions = response.sessions.filter { - $0.threadType != "private" && $0.channelBinding?.sourceChannel == nil + $0.threadType != "private" && ($0.channelBinding?.sourceChannel == nil || $0.channelBinding?.sourceChannel == "voice") } for session in recentSessions { diff --git a/clients/macos/vellum-assistant/Features/MainWindow/ThreadSessionRestorer.swift b/clients/macos/vellum-assistant/Features/MainWindow/ThreadSessionRestorer.swift index 6f56c3f5dcf..c58280b8eea 100644 --- a/clients/macos/vellum-assistant/Features/MainWindow/ThreadSessionRestorer.swift +++ b/clients/macos/vellum-assistant/Features/MainWindow/ThreadSessionRestorer.swift @@ -119,7 +119,7 @@ final class ThreadSessionRestorer { // (e.g. Telegram). External channel-bound sessions belong to their own // lane and should not appear in the desktop conversation list. let recentSessions = response.sessions.filter { - $0.threadType != "private" && $0.channelBinding?.sourceChannel == nil + $0.threadType != "private" && ($0.channelBinding?.sourceChannel == nil || $0.channelBinding?.sourceChannel == "voice") } let defaultThreadIsEmpty = delegate.threads.count == 1 diff --git a/clients/macos/vellum-assistant/Features/Settings/SettingsConnectTab.swift b/clients/macos/vellum-assistant/Features/Settings/SettingsConnectTab.swift index 82c3d198264..09a6d10c998 100644 --- a/clients/macos/vellum-assistant/Features/Settings/SettingsConnectTab.swift +++ b/clients/macos/vellum-assistant/Features/Settings/SettingsConnectTab.swift @@ -272,13 +272,14 @@ struct SettingsConnectTab: View { Text("Channels") .font(VFont.sectionTitle) .foregroundColor(VColor.textPrimary) - Text("Telegram and SMS integrations") + Text("Telegram, SMS, and Voice integrations") .font(VFont.caption) .foregroundColor(VColor.textMuted) } telegramCard twilioCard + voiceCard } } @@ -495,6 +496,66 @@ struct SettingsConnectTab: View { .vCard(background: VColor.surfaceSubtle) } + // MARK: - Voice (Phone Calls) Card + + private var voiceCard: some View { + VStack(alignment: .leading, spacing: VSpacing.md) { + VStack(alignment: .leading, spacing: VSpacing.xs) { + HStack(spacing: VSpacing.xs) { + Image(systemName: "phone.fill") + .foregroundColor(VColor.textPrimary) + .font(.system(size: 12)) + Text("Voice (Phone Calls)") + .font(VFont.sectionTitle) + .foregroundColor(VColor.textPrimary) + } + Text("Receive and make phone calls via Twilio") + .font(VFont.caption) + .foregroundColor(VColor.textMuted) + } + + if store.twilioHasCredentials && store.twilioPhoneNumber != nil { + channelStatusRow( + label: "Status", + icon: "checkmark.circle.fill", + iconColor: VColor.success, + value: "Voice calls ready" + ) + channelStatusRow( + label: "Number", + icon: "phone.fill", + iconColor: VColor.success, + value: store.twilioPhoneNumber ?? "", + valueFont: VFont.mono + ) + } else if store.twilioHasCredentials { + channelStatusRow( + label: "Credentials", + icon: "checkmark.circle.fill", + iconColor: VColor.success, + value: "Configured" + ) + channelStatusRow( + label: "Number", + icon: "exclamationmark.triangle", + iconColor: VColor.warning, + value: "Assign a phone number in SMS settings above", + valueColor: VColor.textMuted + ) + } else { + channelStatusRow( + label: "Status", + icon: "exclamationmark.triangle", + iconColor: VColor.warning, + value: "Configure Twilio credentials in SMS settings above", + valueColor: VColor.textMuted + ) + } + } + .padding(VSpacing.lg) + .vCard(background: VColor.surfaceSubtle) + } + // MARK: - Twilio Credential Entry private var twilioCredentialEntry: some View { From 17f50c60e741dd2f0257318f056bacdcb7042268 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 23:02:20 -0500 Subject: [PATCH 09/13] docs: update SKILL.md and ARCHITECTURE.md for voice-cross-guardian M1-M7 changes (#7538) Reflect the cross-channel guardian architecture in documentation: - SKILL.md: add DTMF callee verification section, update answering questions to describe ASK_GUARDIAN cross-channel dispatch with first-response-wins semantics, note mid-call steering via desktop chat is no longer supported, add accepted regressions section - ARCHITECTURE.md: update outgoing calls intro to describe voice as first-class channel with per-call conversations, replace bridge-based Mermaid diagram flow with guardian dispatch flow, replace call-bridge key component with guardian-dispatch/guardian-action-store/guardian- action-sweep, replace Call Bridge section with Cross-Channel Guardian Consultation, add guardian_action_requests and guardian_action_deliveries SQLite tables, add guardian modules to Channel Guardian Security table Co-authored-by: Claude Opus 4.6 --- ARCHITECTURE.md | 76 +++++++++---------- .../bundled-skills/phone-calls/SKILL.md | 45 ++++++++--- 2 files changed, 68 insertions(+), 53 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 2015f2d947f..07fa5646552 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -3797,6 +3797,9 @@ The `channelGuardianApprovalRequests` table tracks per-run approval state. Each | `assistant/src/memory/channel-guardian-store.ts` | CRUD for guardian bindings, verification challenges, and approval requests (all scoped by `assistantId`) | | `assistant/src/runtime/channel-guardian-service.ts` | Challenge creation/validation, guardian identity checks (`isGuardian()`, `getGuardianBinding()`) -- all accept `assistantId` | | `assistant/src/runtime/routes/channel-routes.ts` | Guardian verification intercept (`/guardian_verify` command), actor role resolution, approval routing to guardian, proactive expiry sweep (`sweepExpiredGuardianApprovals`, `startGuardianExpirySweep`) | +| `assistant/src/calls/guardian-dispatch.ts` | Cross-channel ASK_GUARDIAN dispatch: creates guardian_action_requests, fans out to mac/telegram/sms, manages deliveries | +| `assistant/src/calls/guardian-action-sweep.ts` | Periodic 60s sweep for expired guardian action requests; sends expiry notices to delivery channels | +| `assistant/src/memory/guardian-action-store.ts` | CRUD for guardian_action_requests and guardian_action_deliveries tables; first-writer-wins resolution via atomic status check | ### Telegram Credential Flow @@ -3856,14 +3859,13 @@ In multi-assistant mode, the operator must configure `GATEWAY_ASSISTANT_ROUTING_ ## Outgoing AI Phone Calls — Twilio ConversationRelay -The Calls subsystem enables the assistant to place outgoing phone calls on behalf of the user via Twilio's ConversationRelay protocol. The assistant uses an LLM-driven conversation loop to speak with the callee in real time. During a live call, user messages in the chat thread are automatically routed to the call: if the AI agent has a pending question, the message is treated as an answer; otherwise, it is treated as a mid-call steering instruction that guides the AI agent's behavior in real time. +The Calls subsystem enables the assistant to place outgoing phone calls on behalf of the user via Twilio's ConversationRelay protocol. The assistant uses an LLM-driven conversation loop to speak with the callee in real time. Voice is a first-class channel with its own per-call conversation (key pattern: `asst:${assistantId}:voice:call:${callSessionId}`). When the AI needs guardian input during a call, it dispatches ASK_GUARDIAN requests cross-channel to mac/telegram/sms via the guardian dispatch engine. Answer resolution uses first-writer-wins semantics -- the first channel to respond provides the answer, and remaining channels receive a "already answered" notice. ### Call Flow ```mermaid sequenceDiagram participant User as User (Chat UI) - participant Bridge as CallBridge participant Session as Session / Tool Executor participant CallStore as CallStore (SQLite) participant TwilioProvider as TwilioProvider @@ -3874,6 +3876,10 @@ sequenceDiagram participant Orch as CallOrchestrator participant LLM as Anthropic Claude participant State as CallState (Notifiers) + participant GuardianDispatch as GuardianDispatch + participant Mac as Mac Desktop + participant TG/SMS as Telegram / SMS + participant CallDomain as CallDomain User->>Session: call_start tool Session->>CallStore: createCallSession() @@ -3907,12 +3913,13 @@ sequenceDiagram alt ASK_GUARDIAN pattern detected Orch->>CallStore: createPendingQuestion() - Orch->>State: fireCallQuestionNotifier() - State->>Session: question callback - Session->>User: display question in chat thread - User->>Bridge: next message in thread - Bridge->>Orch: handleUserAnswer() - Bridge->>CallStore: answerPendingQuestion() + Orch->>GuardianDispatch: dispatchGuardianQuestion() + GuardianDispatch->>Mac: guardian_request_thread_created IPC + GuardianDispatch->>TG/SMS: POST /deliver/{channel} + Note over Mac,TG/SMS: First channel to respond wins + Mac/TG/SMS->>Routes: guardian answer + Routes->>CallDomain: answerCall() + CallDomain->>Orch: handleUserAnswer() Orch->>LLM: continue with [USER_ANSWERED: ...] end @@ -3934,7 +3941,9 @@ sequenceDiagram |------|------| | `assistant/src/calls/call-store.ts` | CRUD operations for call sessions, call events, and pending questions in SQLite via Drizzle ORM | | `assistant/src/calls/call-domain.ts` | Shared domain functions (`startCall`, `getCallStatus`, `cancelCall`, `answerCall`, `relayInstruction`) used by both tools and HTTP routes | -| `assistant/src/calls/call-bridge.ts` | Answer-or-instruction bridge: intercepts user chat messages before the agent loop and routes them as answers (when a pending question exists) or as mid-call steering instructions (when no question is pending) | +| `assistant/src/calls/guardian-dispatch.ts` | Cross-channel dispatch engine: fans out ASK_GUARDIAN questions to mac/telegram/sms, creates server-side guardian conversations, manages deliveries | +| `assistant/src/memory/guardian-action-store.ts` | CRUD for guardian action requests and deliveries; first-writer-wins resolution via atomic status check | +| `assistant/src/calls/guardian-action-sweep.ts` | Periodic 60s sweep for expired guardian action requests; sends expiry notices to all delivery channels | | `assistant/src/calls/call-state-machine.ts` | Deterministic state transition validator with allowed-transition table and terminal-state enforcement | | `assistant/src/calls/call-recovery.ts` | Startup reconciliation of non-terminal calls: fetches provider status and transitions stale sessions | | `assistant/src/calls/twilio-provider.ts` | Twilio Voice REST API integration (initiateCall, endCall, getCallStatus) using direct fetch — no Twilio SDK dependency | @@ -3977,50 +3986,29 @@ initiated ──> ringing ──> in_progress ──> waiting_on_user ──> in The `validateTransition(current, next)` function is called by `updateCallSession()` in the call store. Same-state transitions (no-ops) are always valid. Invalid transitions are rejected with an explanatory reason string. -### Call Bridge — Answer-or-Instruction Routing +### Cross-Channel Guardian Consultation -The call bridge (`call-bridge.ts`) intercepts user chat messages during a live call and routes them to the call orchestrator — either as answers to pending questions or as mid-call steering instructions. This enables users to both respond to questions from the callee and proactively steer the conversation without leaving the chat thread. +When the LLM emits `[ASK_GUARDIAN: question]` during a voice call, the orchestrator creates a pending question and calls `dispatchGuardianQuestion()` on the guardian dispatch engine. The dispatch engine handles the full cross-channel fan-out: -The bridge function `tryRouteCallMessage()` applies the following decision logic: +1. **Request creation**: A `guardian_action_request` row is created with a unique 6-character hex request code, the question text, a `pending` status, and an expiry timestamp. -1. **Find active call**: Look up a non-terminal call session for the conversation. If none exists, return `{ handled: false, reason: 'no_active_call' }`. -2. **Pending question → answer path** (priority): If the active call has a pending question, the message is routed as an answer via `handleUserAnswer()` on the orchestrator, which injects `[USER_ANSWERED: answer]` into the LLM context and resumes the conversation with the callee. -3. **No pending question → instruction path**: If no question is pending, the message is routed as a steering instruction via `relayInstruction()` in `call-domain.ts`, which calls `handleUserInstruction()` on the orchestrator. The orchestrator injects `[USER_INSTRUCTION: text]` into its conversation history as high-priority steering input. +2. **Delivery fan-out**: Deliveries are created for each configured channel: + - **Mac (always)**: A server-side conversation is created with key `asst:${assistantId}:guardian:request:${request.id}`. The dispatch engine emits a `guardian_request_thread_created` IPC event so the desktop UI can display the question thread. + - **Telegram/SMS (if guardian binding exists)**: A `POST /deliver/{channel}` request is sent to the gateway with the question text and request code. -#### Answer path detail +3. **Answer resolution**: The first channel to respond wins. Answer resolution uses an atomic `WHERE status = 'pending'` check on the `guardian_action_requests` table -- only the first writer succeeds in transitioning the request to `answered` status. The winning answer text and responding channel are recorded on the request row. -1. **Question emission**: When the LLM emits `[ASK_GUARDIAN: question]`, the orchestrator creates a pending question in SQLite, fires the question notifier, and transitions to `waiting_on_user` state. -2. **In-thread display**: The Session's registered question notifier callback persists an assistant message in the conversation thread (via `conversationStore.addMessage()`) and emits `assistant_text_delta` + `message_complete` events to connected clients. -3. **Auto-consumption**: `tryRouteCallMessage()` is checked before the agent loop in two entrypoints: - - **HTTP path**: `DaemonServer.processMessage()` / `persistAndProcessMessage()` in the daemon server. - - **IPC/session path**: `processMessage()` (direct) and `routeOrProcess()` (queued) in `session-process.ts`. - In both paths, if the bridge consumes the message the agent loop is skipped. Any `userFacingText` returned by the bridge is emitted as `assistant_text_delta` + `message_complete` so the chat UI shows a live acknowledgement or failure notice. -4. **Orchestrator resume**: The orchestrator receives the answer via `handleUserAnswer()`, injects `[USER_ANSWERED: answer]` into the LLM context, and resumes the conversation with the callee. +4. **Stale responses**: Channels that lose the race (respond after another channel has already answered) receive a "already answered" notice informing them that the question was resolved by another channel. -#### Instruction path detail +5. **Request-code disambiguation**: When a guardian has multiple pending requests across concurrent calls, they prefix their answer with the 6-character hex request code to indicate which question they are answering. This allows unambiguous routing even when questions arrive on the same channel in quick succession. -When no pending question exists but a call is active, the user's message is treated as a steering instruction: +6. **Expiry sweep**: The `guardian-action-sweep.ts` module runs a periodic 60-second interval sweep. It finds requests that have passed their expiry timestamp and transitions them to `expired` status. Expiry notices are sent to all delivery channels associated with the expired request. -1. The bridge calls `relayInstruction()` which validates the call is active and delegates to `handleUserInstruction()` on the orchestrator. -2. The orchestrator appends `[USER_INSTRUCTION: text]` to its conversation history. The system prompt tells the model to treat this marker as high-priority steering input. -3. A confirmation message ("Instruction relayed to active call.") is persisted in the conversation thread. -4. **Failure handling**: If `relayInstruction()` fails (no active orchestrator, terminal session, etc.), a failure notice ("Failed to relay instruction to the active call.") is persisted and the message is still consumed (`handled: true`) so the caller does not fall through to the normal agent loop. The bridge returns `reason: 'instruction_relay_failed'` so callers can distinguish success from failure. - -The instruction path is also available via HTTP at `POST /v1/calls/:callSessionId/instruction` for programmatic access (see Runtime HTTP Endpoints). - -#### Bridge result reasons - -The bridge returns a `{ handled: boolean; reason?: string; userFacingText?: string }` result so callers can determine whether the message was consumed: -- `no_active_call` — no non-terminal call session exists for this conversation -- `no_pending_question` — call is active but no question is pending (only returned in legacy answer-only path; current bridge falls through to instruction) -- `orchestrator_not_found` — the orchestrator was destroyed (call ended between question and answer) -- `orchestrator_not_waiting` — the orchestrator is not in `waiting_on_user` state -- `orchestrator_rejected` — the orchestrator's `handleUserAnswer()` returned false -- `instruction_relay_failed` — the instruction could not be relayed to the orchestrator; message is still consumed (`handled: true`) with a failure notice persisted in-thread +7. **Separation from channel guardian approvals**: Guardian action requests are SEPARATE from `channelGuardianApprovalRequests` (the existing channel tool-approval system). The channel guardian approval system handles tool-use permission grants (approve/deny a specific tool invocation). Guardian action requests handle free-form questions from voice calls that require human input to continue the conversation. ### SQLite Tables -All three tables live in `~/.vellum/workspace/data/db/assistant.db` alongside existing tables: +All five tables live in `~/.vellum/workspace/data/db/assistant.db` alongside existing tables: - **`call_sessions`** — One row per outgoing call. Tracks conversation association, provider info (Twilio CallSid), phone numbers, task description, status lifecycle (`initiated` -> `ringing` -> `in_progress` -> `waiting_on_user` -> `completed`/`failed`), and timestamps. Foreign key to `conversations(id)` with cascade delete. @@ -4028,6 +4016,10 @@ All three tables live in `~/.vellum/workspace/data/db/assistant.db` alongside ex - **`call_pending_questions`** — Tracks questions the AI asks the user during a call (via the `[ASK_GUARDIAN: ...]` pattern). Status lifecycle: `pending` -> `answered`/`expired`/`cancelled`. Foreign key to `call_sessions(id)` with cascade delete. +- **`guardian_action_requests`** — Cross-channel guardian consultation requests. One row per ASK_GUARDIAN question from a voice call. Tracks question text, request code (6-char hex), status lifecycle (`pending` -> `answered`/`expired`/`cancelled`), answer text, which channel answered, and expiry timestamp. + +- **`guardian_action_deliveries`** — Per-channel delivery tracking for guardian action requests. One row per (request, channel) pair. Tracks delivery status (`pending` -> `sent` -> `answered`/`expired`/`cancelled`), destination conversation/chat IDs, and response timestamps. + ### Gateway Twilio Webhook Ingress Internet-facing Twilio callbacks terminate at the gateway, which validates signatures before forwarding to the runtime. This keeps the runtime behind the gateway's bearer-auth boundary. diff --git a/assistant/src/config/bundled-skills/phone-calls/SKILL.md b/assistant/src/config/bundled-skills/phone-calls/SKILL.md index a9c44056ffa..a1193fbb153 100644 --- a/assistant/src/config/bundled-skills/phone-calls/SKILL.md +++ b/assistant/src/config/bundled-skills/phone-calls/SKILL.md @@ -181,6 +181,25 @@ credential_store action=store service=twilio field=user_phone_number value=+1415 | `calls.callerIdentity.allowPerCallOverride` | Whether per-call mode selection is allowed | `true` | | `calls.callerIdentity.userNumber` | Optional E.164 phone number for user-number mode (alternative to storing via `credential_store`) | *(empty)* | +## DTMF Callee Verification + +An optional verification step where the callee must enter a numeric code via their phone's keypad (DTMF tones) before the call proceeds. This ensures the intended person has answered the phone. + +### How it works + +1. When the call connects and DTMF verification is enabled, a random numeric code is generated (length configured by `calls.verification.codeLength`). +2. The verification code is shared with the guardian in the initiating conversation so they know what code was issued. +3. The AI voice agent speaks the code digit-by-digit to the callee and asks them to enter it on their keypad. +4. The callee enters the code via DTMF (phone keypad tones). +5. If the code matches, the call proceeds normally. If the code is incorrect, the agent may re-prompt or end the call depending on configuration. + +### Configuration + +| Setting | Description | Default | +|---|---|---| +| `calls.verification.enabled` | Enable DTMF callee verification | `false` | +| `calls.verification.codeLength` | Number of digits in the verification code | `4` | + ## Optional: Higher Quality Voice with ElevenLabs ElevenLabs integration is entirely optional. The standard Twilio-only setup works unchanged — this section is only relevant if you want to improve voice quality. @@ -347,18 +366,12 @@ During an active call, the user can interact with the AI voice agent via the HTT #### Answering questions -When the AI voice agent encounters something it needs user input for, a **pending question** appears in the voice thread. The call status changes to `waiting_on_user`. +When the AI voice agent encounters something it needs user input for, it dispatches an **ASK_GUARDIAN** request to all configured guardian channels (mac desktop, Telegram, SMS). The call status changes to `waiting_on_user`. -1. A **pending question** appears in `call_status` output -2. Present the question prominently to the user: - -``` -❓ The person on the call asked something the assistant needs your help with: - "They're asking if you'd prefer the smoking or non-smoking section?" -``` - -3. Use the `call_answer` tool or the HTTP API (`POST /v1/calls/:id/answer`) to relay the answer to the AI voice agent -4. The AI voice agent receives the answer and continues the conversation naturally +1. The question is delivered simultaneously to every configured channel. The first channel to respond wins (first-response-wins semantics) -- once one channel provides an answer, the other channels receive a "already answered" notice. +2. On the mac desktop, a guardian request thread is created with the question. On Telegram/SMS, the question text and a request code are delivered via the gateway. +3. If DTMF callee verification is enabled, the callee must enter a verification code before the call proceeds (see the **DTMF Callee Verification** section above). +4. The guardian provides an answer through whichever channel they prefer. The answer is routed to the AI voice agent, which continues the conversation naturally. **Important:** Respond to pending questions quickly. There is a consultation timeout (default: 2 minutes). If no answer is provided in time, the AI voice agent will move on. @@ -373,6 +386,8 @@ When there is **no pending question** but the call is still active, the user can The instruction is injected into the AI voice agent's conversation context as high-priority input, and the agent adjusts its behavior accordingly. +**Note:** Mid-call steering via the desktop chat thread is no longer supported. The desktop thread only receives pointer/status messages about the call. To steer a call, use the HTTP API endpoints directly. + ### Call status values - **initiated** — Call is being placed @@ -468,6 +483,14 @@ vellum config set calls.disclosure.text "Just so you know, this is an assistant vellum config set calls.userConsultTimeoutSeconds 300 ``` +## Accepted Regressions + +The following behavioral changes were introduced with the cross-channel guardian architecture (voice-cross-guardian): + +- **No more mid-call steering via desktop chat.** The call bridge that routed desktop chat messages to the active call has been removed. The desktop chat thread only receives pointer/status messages about the call. To steer a call, use the HTTP API endpoints directly (`POST /v1/calls/:id/instruction`). +- **No live transcript mirror in the initiating chat.** The initiating desktop conversation no longer receives a real-time mirror of the call transcript. The initiating chat only gets pointer/status messages (call started, call ended, question asked, etc.). +- **Guardian questions are dispatched cross-channel.** Rather than appearing only in the initiating desktop thread, ASK_GUARDIAN questions are now dispatched to all configured guardian channels (mac desktop, Telegram, SMS) simultaneously. The first channel to respond wins. + ## Troubleshooting ### "Twilio credentials not configured" From 829d0cd7d79e585bded490d9f1d84114cf5cd766 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 23:18:05 -0500 Subject: [PATCH 10/13] fix: address voice-cross-guardian review feedback (5 issues) (#7550) 1. Pass broadcast/assistantId to CallOrchestrator from RelayConnection via module-level setRelayBroadcast wired in lifecycle.ts, so mac desktop receives guardian_request_thread_created IPC events and multi-assistant deployments use the correct assistant ID. 2. Thread bearer token through guardian dispatch deliverToExternalChannel so gateway /deliver/{channel} calls include Authorization header. 3. Swap resolve/answerCall ordering in channel-routes guardian answer interception: call answerCall first, resolve only on success, so failed answers leave the request pending for retry. 4. Use content block array format for addMessage calls in guardian-dispatch.ts and guardian-action-sweep.ts to match codebase convention (JSON.stringify([{type:'text',text:'...'}])). 5. Expire deliveries in 'sent' status (not just 'pending') in expireGuardianActionRequest using inArray. Co-authored-by: Claude Opus 4.6 --- assistant/src/calls/guardian-action-sweep.ts | 2 +- assistant/src/calls/guardian-dispatch.ts | 8 ++++-- assistant/src/calls/relay-server.ts | 13 ++++++++- assistant/src/daemon/lifecycle.ts | 2 ++ assistant/src/memory/guardian-action-store.ts | 4 +-- .../src/runtime/routes/channel-routes.ts | 28 +++++++++++++++++-- 6 files changed, 48 insertions(+), 9 deletions(-) diff --git a/assistant/src/calls/guardian-action-sweep.ts b/assistant/src/calls/guardian-action-sweep.ts index 1e5a00063a0..59109446731 100644 --- a/assistant/src/calls/guardian-action-sweep.ts +++ b/assistant/src/calls/guardian-action-sweep.ts @@ -58,7 +58,7 @@ export function sweepExpiredGuardianActions( addMessage( delivery.destinationConversationId, 'assistant', - JSON.stringify('This guardian question has expired without a response.'), + JSON.stringify([{ type: 'text', text: 'This guardian question has expired without a response.' }]), ); } else if (delivery.destinationChatId) { // External channel — send expiry notice diff --git a/assistant/src/calls/guardian-dispatch.ts b/assistant/src/calls/guardian-dispatch.ts index 9f4e7cc0927..21414806509 100644 --- a/assistant/src/calls/guardian-dispatch.ts +++ b/assistant/src/calls/guardian-dispatch.ts @@ -21,6 +21,7 @@ import { getUserConsultationTimeoutMs } from './call-constants.js'; import { getOrCreateConversation } from '../memory/conversation-key-store.js'; import { addMessage } from '../memory/conversation-store.js'; import type { CallPendingQuestion } from './types.js'; +import { readHttpToken } from '../util/platform.js'; import type { ServerMessage } from '../daemon/ipc-contract.js'; const log = getLogger('guardian-dispatch'); @@ -123,7 +124,7 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): addMessage( macConversationId, 'assistant', - JSON.stringify(`Your assistant needs your input during a phone call.\n\nQuestion: ${request.questionText}\n\nReply to this message with your answer.`), + JSON.stringify([{ type: 'text', text: `Your assistant needs your input during a phone call.\n\nQuestion: ${request.questionText}\n\nReply to this message with your answer.` }]), ); // Emit IPC event for the mac client with the server-created conversation @@ -146,7 +147,7 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): destinationExternalUserId: dest.externalUserId, }); // External channel — POST to gateway - void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId); + void deliverToExternalChannel(delivery.id, dest.channel, dest.chatId!, request.questionText, request.requestCode, assistantId, readHttpToken() ?? undefined); } } } catch (err) { @@ -161,6 +162,7 @@ async function deliverToExternalChannel( questionText: string, requestCode: string, assistantId: string, + bearerToken?: string, ): Promise { const gatewayBase = getGatewayBaseUrl(); const deliverUrl = `${gatewayBase}/deliver/${channel}`; @@ -178,7 +180,7 @@ async function deliverToExternalChannel( chatId, text: messageText, assistantId, - }); + }, bearerToken); updateDeliveryStatus(deliveryId, 'sent'); log.info({ deliveryId, channel, chatId }, 'External guardian delivery sent'); } catch (err) { diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index e56c0d564a3..7708c3991ff 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -109,6 +109,14 @@ export interface RelayWebSocketData { /** Active relay connections keyed by callSessionId. */ export const activeRelayConnections = new Map(); +/** Module-level broadcast function, set by the HTTP server during startup. */ +let globalBroadcast: ((msg: import('../daemon/ipc-contract.js').ServerMessage) => void) | undefined; + +/** Register a broadcast function so RelayConnection can forward IPC events. */ +export function setRelayBroadcast(fn: (msg: import('../daemon/ipc-contract.js').ServerMessage) => void): void { + globalBroadcast = fn; +} + // ── RelayConnection ────────────────────────────────────────────────── /** @@ -336,7 +344,10 @@ export class RelayConnection { }); // Create and attach the LLM-driven orchestrator - const orchestrator = new CallOrchestrator(this.callSessionId, this, session?.task ?? null); + const orchestrator = new CallOrchestrator(this.callSessionId, this, session?.task ?? null, { + broadcast: globalBroadcast, + assistantId: session?.assistantId ?? 'self', + }); this.setOrchestrator(orchestrator); // Check if callee verification is enabled diff --git a/assistant/src/daemon/lifecycle.ts b/assistant/src/daemon/lifecycle.ts index e7f5505c285..e4d94a61779 100644 --- a/assistant/src/daemon/lifecycle.ts +++ b/assistant/src/daemon/lifecycle.ts @@ -24,6 +24,7 @@ import { loadConfig } from '../config/loader.js'; import { ensurePromptFiles } from '../config/system-prompt.js'; import { loadPrebuiltHtml } from '../home-base/prebuilt/seed.js'; import { DaemonServer } from './server.js'; +import { setRelayBroadcast } from '../calls/relay-server.js'; import { listWorkItems, updateWorkItem } from '../work-items/work-item-store.js'; import { getLogger, initLogger } from '../util/logger.js'; import { DaemonError } from '../util/errors.js'; @@ -467,6 +468,7 @@ export async function runDaemon(): Promise { try { log.info({ port, hostname }, 'Daemon startup: starting runtime HTTP server'); await runtimeHttp.start(); + setRelayBroadcast((msg) => server.broadcast(msg)); server.setHttpPort(port); log.info({ port, hostname }, 'Daemon startup: runtime HTTP server listening'); } catch (err) { diff --git a/assistant/src/memory/guardian-action-store.ts b/assistant/src/memory/guardian-action-store.ts index aa335a6e884..23cfe0d9510 100644 --- a/assistant/src/memory/guardian-action-store.ts +++ b/assistant/src/memory/guardian-action-store.ts @@ -7,7 +7,7 @@ * answer resolves the request and all other deliveries are marked answered. */ -import { and, eq, lt } from 'drizzle-orm'; +import { and, eq, lt, inArray } from 'drizzle-orm'; import { v4 as uuid } from 'uuid'; import { getDb } from './db.js'; import { @@ -235,7 +235,7 @@ export function expireGuardianActionRequest(id: string): void { .where( and( eq(guardianActionDeliveries.requestId, id), - eq(guardianActionDeliveries.status, 'pending'), + inArray(guardianActionDeliveries.status, ['pending', 'sent']), ), ) .run(); diff --git a/assistant/src/runtime/routes/channel-routes.ts b/assistant/src/runtime/routes/channel-routes.ts index 7982593d19c..ceab9c06cd0 100644 --- a/assistant/src/runtime/routes/channel-routes.ts +++ b/assistant/src/runtime/routes/channel-routes.ts @@ -632,6 +632,32 @@ export async function handleChannelInbound( if (matchedDelivery) { const request = getGuardianActionRequest(matchedDelivery.requestId); if (request) { + // Attempt to deliver the answer to the call first. Only resolve + // the guardian action request if answerCall succeeds, so that a + // failed delivery (e.g. pending question timed out) leaves the + // request pending for retry from another channel. + const answerResult = await answerCall({ callSessionId: request.callSessionId, answer: answerText }); + + if (!('ok' in answerResult) || !answerResult.ok) { + const errorMsg = 'error' in answerResult ? answerResult.error : 'Unknown error'; + log.warn({ callSessionId: request.callSessionId, error: errorMsg }, 'answerCall failed for guardian answer'); + try { + await deliverChannelReply(replyCallbackUrl, { + chatId: externalChatId, + text: 'Failed to deliver your answer to the call. Please try again.', + assistantId, + }, bearerToken); + } catch (deliverErr) { + log.error({ err: deliverErr, externalChatId }, 'Failed to deliver guardian answer failure notice'); + } + return Response.json({ + accepted: true, + duplicate: false, + eventId: result.eventId, + guardianAnswer: 'answer_failed', + }); + } + const resolved = resolveGuardianActionRequest( request.id, answerText, @@ -640,8 +666,6 @@ export async function handleChannelInbound( ); if (resolved) { - // Route the answer to the voice call - void answerCall({ callSessionId: request.callSessionId, answer: answerText }); return Response.json({ accepted: true, duplicate: false, From eb63e8d73cd4b8b91fc98935bf3fb52d8a140554 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 23:28:19 -0500 Subject: [PATCH 11/13] fix: address round-2 voice-cross-guardian review feedback (#7552) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Fix mac channel guardian-answer ordering: call answerCall before resolveGuardianActionRequest so failed delivery leaves request pending for retry from another channel (mirrors channel-routes.ts). 2. Persist voice transcripts directly to conversation_store alongside notifier fires so transcript history survives without a live daemon Session listening on the voice thread. 3. Fix SKILL.md codeLength default documentation (4 → 6) to match the actual schema default. Co-authored-by: Claude Opus 4.6 --- assistant/src/calls/call-orchestrator.ts | 8 +++++ assistant/src/calls/relay-server.ts | 7 ++++ .../bundled-skills/phone-calls/SKILL.md | 2 +- assistant/src/daemon/session-process.ts | 32 ++++++++++++------- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/assistant/src/calls/call-orchestrator.ts b/assistant/src/calls/call-orchestrator.ts index d8e459c6daf..953842ba84d 100644 --- a/assistant/src/calls/call-orchestrator.ts +++ b/assistant/src/calls/call-orchestrator.ts @@ -22,6 +22,7 @@ import type { RelayConnection } from './relay-server.js'; import { registerCallOrchestrator, unregisterCallOrchestrator, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js'; import type { PromptSpeakerContext } from './speaker-identification.js'; import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; +import * as conversationStore from '../memory/conversation-store.js'; import { dispatchGuardianQuestion } from './guardian-dispatch.js'; import type { ServerMessage } from '../daemon/ipc-contract.js'; @@ -452,6 +453,13 @@ export class CallOrchestrator { if (spokenText.length > 0) { const session = getCallSession(this.callSessionId); if (session) { + // Persist assistant transcript to the voice conversation so it + // survives even when no live daemon Session is listening. + conversationStore.addMessage( + session.conversationId, + 'assistant', + JSON.stringify([{ type: 'text', text: spokenText }]), + ); fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'assistant', spokenText); } } diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index 7708c3991ff..3cd8046daa3 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -454,6 +454,13 @@ export class RelayConnection { const session = getCallSession(this.callSessionId); if (session) { + // Persist caller transcript to the voice conversation so it survives + // even when no live daemon Session is listening. + conversationStore.addMessage( + session.conversationId, + 'user', + JSON.stringify([{ type: 'text', text: msg.voicePrompt }]), + ); fireCallTranscriptNotifier(session.conversationId, this.callSessionId, 'caller', msg.voicePrompt); } diff --git a/assistant/src/config/bundled-skills/phone-calls/SKILL.md b/assistant/src/config/bundled-skills/phone-calls/SKILL.md index a1193fbb153..a85206e2414 100644 --- a/assistant/src/config/bundled-skills/phone-calls/SKILL.md +++ b/assistant/src/config/bundled-skills/phone-calls/SKILL.md @@ -198,7 +198,7 @@ An optional verification step where the callee must enter a numeric code via the | Setting | Description | Default | |---|---|---| | `calls.verification.enabled` | Enable DTMF callee verification | `false` | -| `calls.verification.codeLength` | Number of digits in the verification code | `4` | +| `calls.verification.codeLength` | Number of digits in the verification code | `6` | ## Optional: Higher Quality Voice with ElevenLabs diff --git a/assistant/src/daemon/session-process.ts b/assistant/src/daemon/session-process.ts index 1afea07a075..4878fece246 100644 --- a/assistant/src/daemon/session-process.ts +++ b/assistant/src/daemon/session-process.ts @@ -236,7 +236,6 @@ export async function processMessage( if (guardianDelivery) { const guardianRequest = getGuardianActionRequest(guardianDelivery.requestId); if (guardianRequest && guardianRequest.status === 'pending') { - const resolved = resolveGuardianActionRequest(guardianRequest.id, content, 'mac'); const userMsg = createUserMessage(content, attachments); const persisted = conversationStore.addMessage( session.conversationId, @@ -245,25 +244,36 @@ export async function processMessage( ); session.messages.push(userMsg); - if (resolved) { - void answerCall({ callSessionId: guardianRequest.callSessionId, answer: content }); - const confirmMsg = createAssistantMessage('Your answer has been relayed to the call.'); + // Attempt to deliver the answer to the call first. Only resolve + // the guardian action request if answerCall succeeds, so that a + // failed delivery leaves the request pending for retry from + // another channel. + const answerResult = await answerCall({ callSessionId: guardianRequest.callSessionId, answer: content }); + + if ('ok' in answerResult && answerResult.ok) { + const resolved = resolveGuardianActionRequest(guardianRequest.id, content, 'mac'); + const replyText = resolved + ? 'Your answer has been relayed to the call.' + : 'This question has already been answered from another channel.'; + const replyMsg = createAssistantMessage(replyText); conversationStore.addMessage( session.conversationId, 'assistant', - JSON.stringify(confirmMsg.content), + JSON.stringify(replyMsg.content), ); - session.messages.push(confirmMsg); - onEvent({ type: 'assistant_text_delta', text: 'Your answer has been relayed to the call.' }); + session.messages.push(replyMsg); + onEvent({ type: 'assistant_text_delta', text: replyText }); } else { - const staleMsg = createAssistantMessage('This question has already been answered from another channel.'); + const errorDetail = 'error' in answerResult ? answerResult.error : 'Unknown error'; + log.warn({ callSessionId: guardianRequest.callSessionId, error: errorDetail }, 'answerCall failed for mac guardian answer'); + const failMsg = createAssistantMessage('Failed to deliver your answer to the call. Please try again.'); conversationStore.addMessage( session.conversationId, 'assistant', - JSON.stringify(staleMsg.content), + JSON.stringify(failMsg.content), ); - session.messages.push(staleMsg); - onEvent({ type: 'assistant_text_delta', text: 'This question has already been answered from another channel.' }); + session.messages.push(failMsg); + onEvent({ type: 'assistant_text_delta', text: 'Failed to deliver your answer to the call. Please try again.' }); } onEvent({ type: 'message_complete', sessionId: session.conversationId }); return persisted.id; From 3f159af8ffb89b56bccf4247fc81e5bbc1032304 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 23:39:16 -0500 Subject: [PATCH 12/13] fix: make voice transcript/completion persistence session-independent --- .../src/calls/call-conversation-messages.ts | 27 +++++++++++++++++++ assistant/src/calls/call-orchestrator.ts | 3 +++ assistant/src/calls/relay-server.ts | 2 ++ assistant/src/calls/twilio-routes.ts | 2 ++ assistant/src/daemon/session-notifiers.ts | 27 +++---------------- 5 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 assistant/src/calls/call-conversation-messages.ts diff --git a/assistant/src/calls/call-conversation-messages.ts b/assistant/src/calls/call-conversation-messages.ts new file mode 100644 index 00000000000..b6dabd9c8e6 --- /dev/null +++ b/assistant/src/calls/call-conversation-messages.ts @@ -0,0 +1,27 @@ +/** + * Persistence + formatting helpers for messages that belong in the + * dedicated voice conversation thread. + */ + +import * as conversationStore from '../memory/conversation-store.js'; +import { getCallEvents, getCallSession } from './call-store.js'; + +export function buildCallCompletionMessage(callSessionId: string): string { + const callSession = getCallSession(callSessionId); + const events = getCallEvents(callSessionId); + const duration = callSession?.endedAt && callSession?.startedAt + ? Math.round((callSession.endedAt - callSession.startedAt) / 1000) + : null; + const durationStr = duration !== null ? ` (${duration}s)` : ''; + return `**Call completed**${durationStr}. ${events.length} event(s) recorded.`; +} + +export function persistCallCompletionMessage(conversationId: string, callSessionId: string): string { + const summaryText = buildCallCompletionMessage(callSessionId); + conversationStore.addMessage( + conversationId, + 'assistant', + JSON.stringify([{ type: 'text', text: summaryText }]), + ); + return summaryText; +} diff --git a/assistant/src/calls/call-orchestrator.ts b/assistant/src/calls/call-orchestrator.ts index 953842ba84d..f7fe11ab8e7 100644 --- a/assistant/src/calls/call-orchestrator.ts +++ b/assistant/src/calls/call-orchestrator.ts @@ -22,6 +22,7 @@ import type { RelayConnection } from './relay-server.js'; import { registerCallOrchestrator, unregisterCallOrchestrator, fireCallQuestionNotifier, fireCallCompletionNotifier, fireCallTranscriptNotifier } from './call-state.js'; import type { PromptSpeakerContext } from './speaker-identification.js'; import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; +import { persistCallCompletionMessage } from './call-conversation-messages.js'; import * as conversationStore from '../memory/conversation-store.js'; import { dispatchGuardianQuestion } from './guardian-dispatch.js'; import type { ServerMessage } from '../daemon/ipc-contract.js'; @@ -518,6 +519,7 @@ export class CallOrchestrator { // Notify the voice conversation if (shouldNotifyCompletion && currentSession) { + persistCallCompletionMessage(currentSession.conversationId, this.callSessionId); fireCallCompletionNotifier(currentSession.conversationId, this.callSessionId); } @@ -635,6 +637,7 @@ export class CallOrchestrator { updateCallSession(this.callSessionId, { status: 'completed', endedAt: Date.now() }); recordCallEvent(this.callSessionId, 'call_ended', { reason: 'max_duration' }); if (shouldNotifyCompletion && currentSession) { + persistCallCompletionMessage(currentSession.conversationId, this.callSessionId); fireCallCompletionNotifier(currentSession.conversationId, this.callSessionId); } diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index 3cd8046daa3..83891e4264d 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -19,6 +19,7 @@ import { import { CallOrchestrator } from './call-orchestrator.js'; import { fireCallTranscriptNotifier, fireCallCompletionNotifier } from './call-state.js'; import { addPointerMessage, formatDuration } from './call-pointer-messages.js'; +import { persistCallCompletionMessage } from './call-conversation-messages.js'; import * as conversationStore from '../memory/conversation-store.js'; import { extractPromptSpeakerMetadata, @@ -310,6 +311,7 @@ export class RelayConnection { } expirePendingQuestions(this.callSessionId); + persistCallCompletionMessage(session.conversationId, this.callSessionId); fireCallCompletionNotifier(session.conversationId, this.callSessionId); } diff --git a/assistant/src/calls/twilio-routes.ts b/assistant/src/calls/twilio-routes.ts index 9b581460474..ecbd2b10454 100644 --- a/assistant/src/calls/twilio-routes.ts +++ b/assistant/src/calls/twilio-routes.ts @@ -25,6 +25,7 @@ import { getTwilioConfig } from './twilio-config.js'; import { loadConfig } from '../config/loader.js'; import { getTwilioRelayUrl } from '../inbound/public-ingress-urls.js'; import { fireCallCompletionNotifier } from './call-state.js'; +import { persistCallCompletionMessage } from './call-conversation-messages.js'; import { resolveVoiceQualityProfile, isVoiceProfileValid } from './voice-quality.js'; const log = getLogger('twilio-routes'); @@ -283,6 +284,7 @@ export async function handleStatusCallback(req: Request): Promise { expirePendingQuestions(session.id); if (!wasTerminal) { + persistCallCompletionMessage(session.conversationId, session.id); fireCallCompletionNotifier(session.conversationId, session.id); } } diff --git a/assistant/src/daemon/session-notifiers.ts b/assistant/src/daemon/session-notifiers.ts index 59af01bf1e3..294a1d915ec 100644 --- a/assistant/src/daemon/session-notifiers.ts +++ b/assistant/src/daemon/session-notifiers.ts @@ -30,7 +30,8 @@ import { registerCallCompletionNotifier, unregisterCallCompletionNotifier, } from '../calls/call-state.js'; -import { getCallSession, getCallEvents } from '../calls/call-store.js'; +import { getCallSession } from '../calls/call-store.js'; +import { buildCallCompletionMessage } from '../calls/call-conversation-messages.js'; /** * Subset of Session state that notifier callbacks need to read at @@ -122,14 +123,6 @@ export function registerSessionNotifiers( const speakerLabel = speaker === 'caller' ? 'Caller' : 'Assistant'; const transcriptText = `**Live call transcript**\n${speakerLabel}: ${text}`; - conversationStore.addMessage( - conversationId, - 'assistant', - JSON.stringify([{ type: 'text', text: transcriptText }]), - ); - - ctx.messages.push(createAssistantMessage(transcriptText)); - ctx.sendToClient({ type: 'assistant_text_delta', text: transcriptText, @@ -143,21 +136,7 @@ export function registerSessionNotifiers( ); registerCallCompletionNotifier(conversationId, (callSessionId: string) => { - const callSession = getCallSession(callSessionId); - const events = getCallEvents(callSessionId); - const duration = callSession?.endedAt && callSession?.startedAt - ? Math.round((callSession.endedAt - callSession.startedAt) / 1000) - : null; - const durationStr = duration !== null ? ` (${duration}s)` : ''; - const summaryText = `**Call completed**${durationStr}. ${events.length} event(s) recorded.`; - - conversationStore.addMessage( - conversationId, - 'assistant', - JSON.stringify([{ type: 'text', text: summaryText }]), - ); - - ctx.messages.push(createAssistantMessage(summaryText)); + const summaryText = buildCallCompletionMessage(callSessionId); ctx.sendToClient({ type: 'assistant_text_delta', From ec8c8b0ed011e7052eaa62d13c14c69eeaf4d440 Mon Sep 17 00:00:00 2001 From: Noa Flaherty Date: Mon, 23 Feb 2026 23:58:36 -0500 Subject: [PATCH 13/13] fix voice call transcript handling and close review gaps --- .../call-conversation-messages.test.ts | 130 ++++++++++++++++++ .../src/__tests__/call-orchestrator.test.ts | 54 ++++++++ .../__tests__/guardian-action-store.test.ts | 123 +++++++++++++++++ assistant/src/__tests__/relay-server.test.ts | 105 ++++++++++++-- .../src/calls/call-conversation-messages.ts | 7 +- assistant/src/calls/call-orchestrator.ts | 19 ++- assistant/src/calls/relay-server.ts | 30 ++-- assistant/src/memory/guardian-action-store.ts | 2 +- 8 files changed, 442 insertions(+), 28 deletions(-) create mode 100644 assistant/src/__tests__/call-conversation-messages.test.ts create mode 100644 assistant/src/__tests__/guardian-action-store.test.ts diff --git a/assistant/src/__tests__/call-conversation-messages.test.ts b/assistant/src/__tests__/call-conversation-messages.test.ts new file mode 100644 index 00000000000..1a413c29f8a --- /dev/null +++ b/assistant/src/__tests__/call-conversation-messages.test.ts @@ -0,0 +1,130 @@ +import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const testDir = mkdtempSync(join(tmpdir(), 'call-conversation-messages-test-')); + +mock.module('../util/platform.js', () => ({ + getDataDir: () => testDir, + isMacOS: () => process.platform === 'darwin', + isLinux: () => process.platform === 'linux', + isWindows: () => process.platform === 'win32', + getSocketPath: () => join(testDir, 'test.sock'), + getPidPath: () => join(testDir, 'test.pid'), + getDbPath: () => join(testDir, 'test.db'), + getLogPath: () => join(testDir, 'test.log'), + ensureDataDir: () => {}, +})); + +mock.module('../util/logger.js', () => ({ + getLogger: () => + new Proxy({} as Record, { + get: () => () => {}, + }), +})); + +import { initializeDb, getDb, resetDb } from '../memory/db.js'; +import { conversations } from '../memory/schema.js'; +import { createCallSession, updateCallSession, recordCallEvent } from '../calls/call-store.js'; +import { getMessages } from '../memory/conversation-store.js'; +import { buildCallCompletionMessage, persistCallCompletionMessage } from '../calls/call-conversation-messages.js'; + +initializeDb(); + +function ensureConversation(id: string): void { + const db = getDb(); + const now = Date.now(); + db.insert(conversations).values({ + id, + title: `Conversation ${id}`, + createdAt: now, + updatedAt: now, + }).run(); +} + +function resetTables(): void { + const db = getDb(); + db.run('DELETE FROM call_events'); + db.run('DELETE FROM call_pending_questions'); + db.run('DELETE FROM call_sessions'); + db.run('DELETE FROM messages'); + db.run('DELETE FROM conversations'); +} + +function getLatestAssistantText(conversationId: string): string { + const rows = getMessages(conversationId).filter((m) => m.role === 'assistant'); + expect(rows.length).toBeGreaterThan(0); + const latest = rows[rows.length - 1]; + const parsed = JSON.parse(latest.content) as Array<{ type: string; text?: string }>; + return parsed.filter((b) => b.type === 'text').map((b) => b.text ?? '').join(''); +} + +describe('call-conversation-messages', () => { + beforeEach(() => { + resetTables(); + }); + + afterAll(() => { + resetDb(); + try { + rmSync(testDir, { recursive: true }); + } catch { + // best-effort cleanup + } + }); + + test('buildCallCompletionMessage labels failed calls correctly', () => { + const conversationId = 'conv-call-msg-failed'; + ensureConversation(conversationId); + const session = createCallSession({ + conversationId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + + updateCallSession(session.id, { status: 'in_progress', startedAt: 1_000 }); + updateCallSession(session.id, { status: 'failed', endedAt: 6_000 }); + recordCallEvent(session.id, 'call_connected'); + recordCallEvent(session.id, 'call_failed'); + + expect(buildCallCompletionMessage(session.id)).toBe('**Call failed** (5s). 2 event(s) recorded.'); + }); + + test('buildCallCompletionMessage labels cancelled calls correctly', () => { + const conversationId = 'conv-call-msg-cancelled'; + ensureConversation(conversationId); + const session = createCallSession({ + conversationId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + + updateCallSession(session.id, { status: 'in_progress', startedAt: 1_000 }); + updateCallSession(session.id, { status: 'cancelled', endedAt: 4_000 }); + recordCallEvent(session.id, 'call_connected'); + recordCallEvent(session.id, 'call_ended'); + + expect(buildCallCompletionMessage(session.id)).toBe('**Call cancelled** (3s). 2 event(s) recorded.'); + }); + + test('persistCallCompletionMessage keeps completed label when status is completed', () => { + const conversationId = 'conv-call-msg-completed'; + ensureConversation(conversationId); + const session = createCallSession({ + conversationId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + + updateCallSession(session.id, { status: 'completed' }); + recordCallEvent(session.id, 'call_ended'); + + const summary = persistCallCompletionMessage(conversationId, session.id); + expect(summary).toBe('**Call completed**. 1 event(s) recorded.'); + expect(getLatestAssistantText(conversationId)).toBe('**Call completed**. 1 event(s) recorded.'); + }); +}); diff --git a/assistant/src/__tests__/call-orchestrator.test.ts b/assistant/src/__tests__/call-orchestrator.test.ts index ffbdb4e0bb1..9bb0d838d1e 100644 --- a/assistant/src/__tests__/call-orchestrator.test.ts +++ b/assistant/src/__tests__/call-orchestrator.test.ts @@ -572,6 +572,60 @@ describe('call-orchestrator', () => { orchestrator.destroy(); }); + test('barge-in cleanup never sends empty user turns to Anthropic', async () => { + let callCount = 0; + mockStreamFn.mockImplementation((...args: unknown[]) => { + callCount++; + + // Initial outbound opener + if (callCount === 1) { + return createMockStream(['Hey Noa, this is Credence calling.']); + } + + // First caller turn enters an in-flight LLM run that gets interrupted + if (callCount === 2) { + const emitter = new EventEmitter(); + const options = args[1] as { signal?: AbortSignal } | undefined; + return { + on: (event: string, handler: (...evtArgs: unknown[]) => void) => { + emitter.on(event, handler); + return { on: () => ({ on: () => ({}) }) }; + }, + finalMessage: () => + new Promise((_, reject) => { + options?.signal?.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }, { once: true }); + }), + }; + } + + // Second caller turn should never include an empty user message. + const firstArg = args[0] as { messages: Array<{ role: string; content: string }> }; + const userMessages = firstArg.messages.filter((m) => m.role === 'user'); + expect(userMessages.length).toBeGreaterThan(0); + expect(userMessages.every((m) => m.content.trim().length > 0)).toBe(true); + return createMockStream(['Got it, thanks for clarifying.']); + }); + + const { relay, orchestrator } = setupOrchestrator('Quick check-in'); + await orchestrator.startInitialGreeting(); + + const firstTurnPromise = orchestrator.handleCallerUtterance('Hello?'); + await new Promise((r) => setTimeout(r, 5)); + const secondTurnPromise = orchestrator.handleCallerUtterance('What have you been up to lately?'); + + await Promise.all([firstTurnPromise, secondTurnPromise]); + + const allTokens = relay.sentTokens.map((t) => t.token).join(''); + expect(allTokens).toContain('Got it, thanks for clarifying.'); + expect(allTokens).not.toContain('technical issue'); + + orchestrator.destroy(); + }); + test('rapid caller barge-in coalesces contiguous user turns for role alternation', async () => { let callCount = 0; mockStreamFn.mockImplementation((...args: unknown[]) => { diff --git a/assistant/src/__tests__/guardian-action-store.test.ts b/assistant/src/__tests__/guardian-action-store.test.ts new file mode 100644 index 00000000000..1cfc35f9081 --- /dev/null +++ b/assistant/src/__tests__/guardian-action-store.test.ts @@ -0,0 +1,123 @@ +import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const testDir = mkdtempSync(join(tmpdir(), 'guardian-action-store-test-')); + +mock.module('../util/platform.js', () => ({ + getDataDir: () => testDir, + isMacOS: () => process.platform === 'darwin', + isLinux: () => process.platform === 'linux', + isWindows: () => process.platform === 'win32', + getSocketPath: () => join(testDir, 'test.sock'), + getPidPath: () => join(testDir, 'test.pid'), + getDbPath: () => join(testDir, 'test.db'), + getLogPath: () => join(testDir, 'test.log'), + ensureDataDir: () => {}, +})); + +mock.module('../util/logger.js', () => ({ + getLogger: () => + new Proxy({} as Record, { + get: () => () => {}, + }), +})); + +import { initializeDb, getDb, resetDb } from '../memory/db.js'; +import { conversations } from '../memory/schema.js'; +import { createCallSession, createPendingQuestion } from '../calls/call-store.js'; +import { + createGuardianActionRequest, + createGuardianActionDelivery, + updateDeliveryStatus, + cancelGuardianActionRequest, + getGuardianActionRequest, + getDeliveriesByRequestId, +} from '../memory/guardian-action-store.js'; + +initializeDb(); + +function ensureConversation(id: string): void { + const db = getDb(); + const now = Date.now(); + db.insert(conversations).values({ + id, + title: `Conversation ${id}`, + createdAt: now, + updatedAt: now, + }).run(); +} + +function resetTables(): void { + const db = getDb(); + db.run('DELETE FROM guardian_action_deliveries'); + db.run('DELETE FROM guardian_action_requests'); + db.run('DELETE FROM call_pending_questions'); + db.run('DELETE FROM call_events'); + db.run('DELETE FROM call_sessions'); + db.run('DELETE FROM conversations'); +} + +describe('guardian-action-store', () => { + beforeEach(() => { + resetTables(); + }); + + afterAll(() => { + resetDb(); + try { + rmSync(testDir, { recursive: true }); + } catch { + // best-effort cleanup + } + }); + + test('cancelGuardianActionRequest cancels both pending and sent deliveries', () => { + const conversationId = 'conv-guardian-cancel'; + ensureConversation(conversationId); + + const session = createCallSession({ + conversationId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + const pendingQuestion = createPendingQuestion(session.id, 'What is our gate code?'); + + const request = createGuardianActionRequest({ + kind: 'ask_guardian', + sourceChannel: 'voice', + sourceConversationId: conversationId, + callSessionId: session.id, + pendingQuestionId: pendingQuestion.id, + questionText: pendingQuestion.questionText, + expiresAt: Date.now() + 60_000, + }); + + const pendingDelivery = createGuardianActionDelivery({ + requestId: request.id, + destinationChannel: 'mac', + destinationConversationId: 'conv-mac-guardian', + }); + const sentDelivery = createGuardianActionDelivery({ + requestId: request.id, + destinationChannel: 'telegram', + destinationChatId: 'chat-guardian', + destinationExternalUserId: 'guardian-user', + }); + updateDeliveryStatus(sentDelivery.id, 'sent'); + + cancelGuardianActionRequest(request.id); + + const updatedRequest = getGuardianActionRequest(request.id); + expect(updatedRequest).not.toBeNull(); + expect(updatedRequest!.status).toBe('cancelled'); + + const deliveries = getDeliveriesByRequestId(request.id); + const pendingAfter = deliveries.find((d) => d.id === pendingDelivery.id); + const sentAfter = deliveries.find((d) => d.id === sentDelivery.id); + expect(pendingAfter?.status).toBe('cancelled'); + expect(sentAfter?.status).toBe('cancelled'); + }); +}); diff --git a/assistant/src/__tests__/relay-server.test.ts b/assistant/src/__tests__/relay-server.test.ts index 820cb4dc4dd..0c66e682706 100644 --- a/assistant/src/__tests__/relay-server.test.ts +++ b/assistant/src/__tests__/relay-server.test.ts @@ -44,18 +44,25 @@ mock.module('../util/logger.js', () => ({ // ── Config mock ───────────────────────────────────────────────────── -mock.module('../config/loader.js', () => ({ - getConfig: () => ({ - apiKeys: { anthropic: 'test-key' }, - calls: { - enabled: true, - provider: 'twilio', - maxDurationSeconds: 3600, - userConsultTimeoutSeconds: 120, - disclosure: { enabled: false, text: '' }, - safety: { denyCategories: [] }, +const mockConfig = { + apiKeys: { anthropic: 'test-key' }, + calls: { + enabled: true, + provider: 'twilio', + maxDurationSeconds: 3600, + userConsultTimeoutSeconds: 120, + disclosure: { enabled: false, text: '' }, + safety: { denyCategories: [] }, + verification: { + enabled: false, + maxAttempts: 3, + codeLength: 6, }, - }), + }, +}; + +mock.module('../config/loader.js', () => ({ + getConfig: () => mockConfig, })); // ── Anthropic SDK mock ────────────────────────────────────────────── @@ -104,6 +111,7 @@ import { getCallSession, getCallEvents, } from '../calls/call-store.js'; +import { getMessages } from '../memory/conversation-store.js'; import { registerCallCompletionNotifier, unregisterCallCompletionNotifier } from '../calls/call-state.js'; import { RelayConnection, activeRelayConnections } from '../calls/relay-server.js'; import type { RelayWebSocketData } from '../calls/relay-server.js'; @@ -166,11 +174,34 @@ function resetTables() { ensuredConvIds = new Set(); } +function getLatestAssistantText(conversationId: string): string | null { + const messages = getMessages(conversationId).filter((m) => m.role === 'assistant'); + if (messages.length === 0) return null; + const latest = messages[messages.length - 1]; + try { + const parsed = JSON.parse(latest.content) as unknown; + if (Array.isArray(parsed)) { + return parsed + .filter((block): block is { type: string; text?: string } => typeof block === 'object' && block !== null) + .filter((block) => block.type === 'text') + .map((block) => block.text ?? '') + .join(''); + } + if (typeof parsed === 'string') return parsed; + } catch { + // Ignore parse failures and fall back to raw content. + } + return latest.content; +} + describe('relay-server', () => { beforeEach(() => { resetTables(); activeRelayConnections.clear(); mockStreamFn.mockImplementation(() => createMockStream(['Hello'])); + mockConfig.calls.verification.enabled = false; + mockConfig.calls.verification.maxAttempts = 3; + mockConfig.calls.verification.codeLength = 6; }); // ── Setup message handling ────────────────────────────────────── @@ -270,6 +301,7 @@ describe('relay-server', () => { const endedEvents = getCallEvents(session.id).filter((e) => e.eventType === 'call_ended'); expect(endedEvents.length).toBe(1); expect(completionCount).toBe(1); + expect(getLatestAssistantText('conv-relay-close-normal')).toContain('**Call completed**'); unregisterCallCompletionNotifier('conv-relay-close-normal'); relay.destroy(); @@ -294,6 +326,7 @@ describe('relay-server', () => { expect(updated!.lastError).toContain('abnormal closure'); const failEvents = getCallEvents(session.id).filter((e) => e.eventType === 'call_failed'); expect(failEvents.length).toBe(1); + expect(getLatestAssistantText('conv-relay-close-abnormal')).toContain('**Call failed**'); relay.destroy(); }); @@ -506,6 +539,56 @@ describe('relay-server', () => { relay.destroy(); }); + test('verification failure remains failed if transport closes during goodbye delay', async () => { + ensureConversation('conv-relay-verify-race'); + const session = createCallSession({ + conversationId: 'conv-relay-verify-race', + provider: 'twilio', + fromNumber: '+15551111111', + toNumber: '+15552222222', + }); + + mockConfig.calls.verification.enabled = true; + mockConfig.calls.verification.maxAttempts = 1; + mockConfig.calls.verification.codeLength = 1; + + const { relay } = createMockWs(session.id); + + await relay.handleMessage(JSON.stringify({ + type: 'setup', + callSid: 'CA_verify_race_123', + from: '+15551111111', + to: '+15552222222', + })); + + const verificationCode = relay.getVerificationCode(); + expect(verificationCode).not.toBeNull(); + const wrongDigit = verificationCode === '0' ? '1' : '0'; + + await relay.handleMessage(JSON.stringify({ + type: 'dtmf', + digit: wrongDigit, + })); + + // Simulate the callee hanging up before the delayed endSession executes. + relay.handleTransportClosed(1000, 'callee hung up'); + + const updated = getCallSession(session.id); + expect(updated).not.toBeNull(); + expect(updated!.status).toBe('failed'); + expect(updated!.lastError).toContain('max attempts exceeded'); + expect(getLatestAssistantText('conv-relay-verify-race')).toContain('**Call failed**'); + + // Let the delayed endSession callback flush to avoid timer bleed across tests. + await new Promise((resolve) => setTimeout(resolve, 2100)); + + const finalState = getCallSession(session.id); + expect(finalState).not.toBeNull(); + expect(finalState!.status).toBe('failed'); + + relay.destroy(); + }); + // ── Error handling ────────────────────────────────────────────── test('handleMessage: error message records call_failed event', async () => { diff --git a/assistant/src/calls/call-conversation-messages.ts b/assistant/src/calls/call-conversation-messages.ts index b6dabd9c8e6..03b89afb110 100644 --- a/assistant/src/calls/call-conversation-messages.ts +++ b/assistant/src/calls/call-conversation-messages.ts @@ -13,7 +13,12 @@ export function buildCallCompletionMessage(callSessionId: string): string { ? Math.round((callSession.endedAt - callSession.startedAt) / 1000) : null; const durationStr = duration !== null ? ` (${duration}s)` : ''; - return `**Call completed**${durationStr}. ${events.length} event(s) recorded.`; + const statusLabel = callSession?.status === 'failed' + ? 'Call failed' + : callSession?.status === 'cancelled' + ? 'Call cancelled' + : 'Call completed'; + return `**${statusLabel}**${durationStr}. ${events.length} event(s) recorded.`; } export function persistCallCompletionMessage(conversationId: string, callSessionId: string): string { diff --git a/assistant/src/calls/call-orchestrator.ts b/assistant/src/calls/call-orchestrator.ts index f7fe11ab8e7..c5225ce9e1c 100644 --- a/assistant/src/calls/call-orchestrator.ts +++ b/assistant/src/calls/call-orchestrator.ts @@ -129,9 +129,17 @@ export class CallOrchestrator { // the caller's transcript to the synthetic "[CALL_OPENING]" message, // causing the model to re-run opener behavior instead of responding // directly to the caller. - for (const entry of this.conversationHistory) { - if (entry.content.includes(CALL_OPENING_MARKER)) { - entry.content = entry.content.replace(CALL_OPENING_MARKER_REGEX, '').trim(); + // If the marker-only seed message becomes empty, remove it entirely: + // Anthropic rejects any user turn with empty content. + for (let i = 0; i < this.conversationHistory.length; i++) { + const entry = this.conversationHistory[i]; + if (!entry.content.includes(CALL_OPENING_MARKER)) continue; + const stripped = entry.content.replace(CALL_OPENING_MARKER_REGEX, '').trim(); + if (stripped.length === 0) { + this.conversationHistory.splice(i, 1); + i--; + } else { + entry.content = stripped; } } } @@ -155,7 +163,10 @@ export class CallOrchestrator { // this utterance into that same user turn. const lastMessage = this.conversationHistory[this.conversationHistory.length - 1]; if (lastMessage?.role === 'user') { - lastMessage.content = `${lastMessage.content}\n${callerTurnContent}`; + const existingContent = lastMessage.content.trim(); + lastMessage.content = existingContent.length > 0 + ? `${lastMessage.content}\n${callerTurnContent}` + : callerTurnContent; } else { this.conversationHistory.push({ role: 'user', diff --git a/assistant/src/calls/relay-server.ts b/assistant/src/calls/relay-server.ts index 83891e4264d..e57e3a59f50 100644 --- a/assistant/src/calls/relay-server.ts +++ b/assistant/src/calls/relay-server.ts @@ -537,21 +537,29 @@ export class RelayConnection { this.sendTextToken('Verification failed. Goodbye.', true); - // End the call with failed status after TTS plays - setTimeout(() => { - this.endSession('Verification failed'); - updateCallSession(this.callSessionId, { - status: 'failed', - endedAt: Date.now(), - lastError: 'Callee verification failed — max attempts exceeded', - }); - - const session = getCallSession(this.callSessionId); - if (session?.initiatedFromConversationId) { + // Mark failed immediately so a relay close during the goodbye TTS + // window cannot race this into a terminal "completed" status. + updateCallSession(this.callSessionId, { + status: 'failed', + endedAt: Date.now(), + lastError: 'Callee verification failed — max attempts exceeded', + }); + + const session = getCallSession(this.callSessionId); + if (session) { + expirePendingQuestions(this.callSessionId); + persistCallCompletionMessage(session.conversationId, this.callSessionId); + fireCallCompletionNotifier(session.conversationId, this.callSessionId); + if (session.initiatedFromConversationId) { addPointerMessage(session.initiatedFromConversationId, 'failed', session.toNumber, { reason: 'Callee verification failed', }); } + } + + // End the call with failed status after TTS plays + setTimeout(() => { + this.endSession('Verification failed'); }, 2000); } else { // Allow another attempt diff --git a/assistant/src/memory/guardian-action-store.ts b/assistant/src/memory/guardian-action-store.ts index 23cfe0d9510..9dc5ea3902f 100644 --- a/assistant/src/memory/guardian-action-store.ts +++ b/assistant/src/memory/guardian-action-store.ts @@ -295,7 +295,7 @@ export function cancelGuardianActionRequest(id: string): void { .where( and( eq(guardianActionDeliveries.requestId, id), - eq(guardianActionDeliveries.status, 'pending'), + inArray(guardianActionDeliveries.status, ['pending', 'sent']), ), ) .run();