diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 840b199f30e..4971d0ce411 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -4277,6 +4277,14 @@ When the LLM emits `[ASK_GUARDIAN: question]` during a voice call, the orchestra 7. **Separation from channel guardian approvals**: Guardian action requests are SEPARATE from `channelGuardianApprovalRequests` (the existing channel tool-approval system). The channel guardian approval system handles tool-use permission grants (approve/deny a specific tool invocation). Guardian action requests handle free-form questions from voice calls that require human input to continue the conversation. +#### Guardian Request Copy Generation Pipeline + +Thread titles and initial messages for guardian question threads are generated via `guardian-question-copy.ts`. The module calls the configured LLM provider (with `modelIntent: 'latency-optimized'`) to produce an emoji-prefixed, attention-oriented title and a richer initial message that explains the live-call context. A 5-second timeout guards the generation call. When no provider is configured, generation times out, or parsing fails, the module falls back to deterministic copy (`buildFallbackCopy`) that uses a warning emoji prefix and a simple template containing the question text. The generative copy is awaited only in the macOS delivery branch so that Telegram/SMS deliveries dispatch without LLM latency. + +#### macOS Notification + Deep-Link Flow + +When a guardian question is dispatched while the macOS app is backgrounded, the Swift client posts a native `UNUserNotificationCenter` notification under the `GUARDIAN_REQUEST` category. The notification title mirrors the emoji-prefixed thread title from the copy generation pipeline. Tapping the notification triggers the `openConversationThread` deep-link handler, which switches the main window to the guardian question thread so the user can reply immediately. + ### SQLite Tables All five tables live in `~/.vellum/workspace/data/db/assistant.db` alongside existing tables: diff --git a/assistant/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap b/assistant/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap index 565417ca6d1..31e73f75ba1 100644 --- a/assistant/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +++ b/assistant/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap @@ -2614,6 +2614,7 @@ exports[`IPC message snapshots ServerMessage types guardian_request_thread_creat { "callSessionId": "call-001", "conversationId": "conv-guardian-001", + "questionText": "What is the gate code?", "requestId": "req-guardian-001", "title": "Guardian action request", "type": "guardian_request_thread_created", diff --git a/assistant/src/__tests__/guardian-dispatch.test.ts b/assistant/src/__tests__/guardian-dispatch.test.ts index 37c4957dc7f..dcf8fdc4a59 100644 --- a/assistant/src/__tests__/guardian-dispatch.test.ts +++ b/assistant/src/__tests__/guardian-dispatch.test.ts @@ -56,6 +56,34 @@ mock.module('../runtime/gateway-client.js', () => ({ }, })); +// Mock guardian-question-copy to return deterministic values without hitting a real provider. +// Only generateGuardianCopy (the async LLM call) is mocked; buildFallbackCopy is the real +// implementation passed through so guardian-dispatch can use it if needed. +let mockGuardianCopy = { + threadTitle: '\u{1F6A8} Caller needs the gate code', + initialMessage: 'Your assistant needs your input during a live phone call.\n\nQuestion: What is the gate code?\n\nReply to this message with your answer.', +}; + +mock.module('../calls/guardian-question-copy.js', () => ({ + generateGuardianCopy: async (questionText: string) => ({ + threadTitle: mockGuardianCopy.threadTitle, + initialMessage: mockGuardianCopy.initialMessage.includes(questionText) + ? mockGuardianCopy.initialMessage + : mockGuardianCopy.initialMessage.replace(/Question: .*/, `Question: ${questionText}`), + }), + // Pass through the real buildFallbackCopy implementation (tested in guardian-question-copy.test.ts) + buildFallbackCopy: (questionText: string) => ({ + threadTitle: `\u26A0\uFE0F ${questionText.slice(0, 70)}`, + initialMessage: [ + 'Your assistant needs your input during a phone call.', + '', + `Question: ${questionText}`, + '', + 'Reply to this message with your answer.', + ].join('\n'), + }), +})); + import { initializeDb, getDb, resetDb } from '../memory/db.js'; import { conversations } from '../memory/schema.js'; import { createCallSession, createPendingQuestion } from '../calls/call-store.js'; @@ -87,6 +115,10 @@ function resetTables(): void { mockTelegramBinding = null; mockSmsBinding = null; deliveredMessages.length = 0; + mockGuardianCopy = { + threadTitle: '\u{1F6A8} Caller needs the gate code', + initialMessage: 'Your assistant needs your input during a live phone call.\n\nQuestion: What is the gate code?\n\nReply to this message with your answer.', + }; } describe('guardian-dispatch', () => { @@ -250,4 +282,108 @@ describe('guardian-dispatch', () => { pendingQuestion: pq, })).resolves.toBeUndefined(); }); + + test('broadcast title is emoji-prefixed and does not start with "Guardian question:"', async () => { + const convId = 'conv-dispatch-6'; + ensureConversation(convId); + + const session = createCallSession({ + conversationId: convId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + const pq = createPendingQuestion(session.id, 'What is the gate code?'); + + const broadcastedMessages: unknown[] = []; + const broadcastFn = (msg: unknown) => { broadcastedMessages.push(msg); }; + + await dispatchGuardianQuestion({ + callSessionId: session.id, + conversationId: convId, + assistantId: 'self', + pendingQuestion: pq, + broadcast: broadcastFn, + }); + + const msg = broadcastedMessages[0] as Record; + const title = msg.title as string; + + // Title must NOT start with the old static "Guardian question:" prefix + expect(title.startsWith('Guardian question:')).toBe(false); + + // Title must start with an emoji (code point > 127 or common emoji ranges) + const firstCodePoint = title.codePointAt(0)!; + expect(firstCodePoint).toBeGreaterThan(127); + }); + + test('broadcast includes questionText field matching the original question', async () => { + const convId = 'conv-dispatch-7'; + ensureConversation(convId); + + const questionText = 'What is the WiFi password?'; + const session = createCallSession({ + conversationId: convId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + const pq = createPendingQuestion(session.id, questionText); + + const broadcastedMessages: unknown[] = []; + const broadcastFn = (msg: unknown) => { broadcastedMessages.push(msg); }; + + await dispatchGuardianQuestion({ + callSessionId: session.id, + conversationId: convId, + assistantId: 'self', + pendingQuestion: pq, + broadcast: broadcastFn, + }); + + expect(broadcastedMessages).toHaveLength(1); + const msg = broadcastedMessages[0] as Record; + expect(msg.type).toBe('guardian_request_thread_created'); + expect(msg.questionText).toBe(questionText); + }); + + test('initial message in mac conversation contains question text from generative copy', async () => { + const convId = 'conv-dispatch-8'; + ensureConversation(convId); + + // Set mock copy to a known generative-style message + mockGuardianCopy = { + threadTitle: '\u{1F4DE} Live call: Gate code needed', + initialMessage: 'You have an active phone call that needs your help.\n\nThe caller is asking: What is the gate code?\n\nPlease reply with your answer to resume the call.', + }; + + const session = createCallSession({ + conversationId: convId, + provider: 'twilio', + fromNumber: '+15550001111', + toNumber: '+15550002222', + }); + const pq = createPendingQuestion(session.id, 'What is the gate code?'); + + const broadcastedMessages: unknown[] = []; + const broadcastFn = (msg: unknown) => { broadcastedMessages.push(msg); }; + + await dispatchGuardianQuestion({ + callSessionId: session.id, + conversationId: convId, + assistantId: 'self', + pendingQuestion: pq, + broadcast: broadcastFn, + }); + + const msg = broadcastedMessages[0] as Record; + const macConvId = msg.conversationId as string; + + const messages = getMessages(macConvId); + expect(messages.length).toBeGreaterThanOrEqual(1); + const content = messages[0].content; + // The generative copy should be used as the initial message + expect(content).toContain('What is the gate code?'); + expect(content).toContain('active phone call'); + }); }); diff --git a/assistant/src/__tests__/guardian-question-copy.test.ts b/assistant/src/__tests__/guardian-question-copy.test.ts new file mode 100644 index 00000000000..b97cdf8848b --- /dev/null +++ b/assistant/src/__tests__/guardian-question-copy.test.ts @@ -0,0 +1,47 @@ +import { describe, test, expect } from 'bun:test'; +import { buildFallbackCopy } from '../calls/guardian-question-copy.js'; + +describe('buildFallbackCopy', () => { + test('threadTitle starts with warning emoji', () => { + const result = buildFallbackCopy('What is the gate code?'); + expect(result.threadTitle.startsWith('\u26A0\uFE0F')).toBe(true); + }); + + test('threadTitle does not start with "Guardian question:"', () => { + const result = buildFallbackCopy('What is the gate code?'); + expect(result.threadTitle.startsWith('Guardian question:')).toBe(false); + }); + + test('threadTitle is under 80 characters for reasonable input', () => { + const result = buildFallbackCopy('What is the gate code?'); + expect(result.threadTitle.length).toBeLessThan(80); + }); + + test('initialMessage contains the question text', () => { + const question = 'Should I let the delivery driver in?'; + const result = buildFallbackCopy(question); + expect(result.initialMessage).toContain(question); + }); + + test('initialMessage contains "Reply to this message" instruction', () => { + const result = buildFallbackCopy('Any question here'); + expect(result.initialMessage).toContain('Reply to this message'); + }); + + test('very long question text gets truncated in title', () => { + const longQuestion = 'A'.repeat(200); + const result = buildFallbackCopy(longQuestion); + + // Title should use questionText.slice(0, 70), so the question portion is at most 70 chars + // Plus the emoji prefix and space, should still be well under 80 + expect(result.threadTitle.length).toBeLessThanOrEqual( + '\u26A0\uFE0F '.length + 70, + ); + + // The full question should NOT appear in the title + expect(result.threadTitle).not.toContain(longQuestion); + + // But the full question should still appear in the initial message + expect(result.initialMessage).toContain(longQuestion); + }); +}); diff --git a/assistant/src/__tests__/ipc-snapshot.test.ts b/assistant/src/__tests__/ipc-snapshot.test.ts index 507bb77d7dd..ff7038b6715 100644 --- a/assistant/src/__tests__/ipc-snapshot.test.ts +++ b/assistant/src/__tests__/ipc-snapshot.test.ts @@ -1685,6 +1685,7 @@ const serverMessages: Record = { requestId: 'req-guardian-001', callSessionId: 'call-001', title: 'Guardian action request', + questionText: 'What is the gate code?', }, subagent_spawned: { type: 'subagent_spawned', diff --git a/assistant/src/calls/guardian-dispatch.ts b/assistant/src/calls/guardian-dispatch.ts index e0c4afdd26c..6cd1ac31e61 100644 --- a/assistant/src/calls/guardian-dispatch.ts +++ b/assistant/src/calls/guardian-dispatch.ts @@ -24,6 +24,7 @@ import { addMessage } from '../memory/conversation-store.js'; import type { CallPendingQuestion } from './types.js'; import { readHttpToken } from '../util/platform.js'; import type { ServerMessage } from '../daemon/ipc-contract.js'; +import { generateGuardianCopy } from './guardian-question-copy.js'; const log = getLogger('guardian-dispatch'); @@ -104,10 +105,19 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): // Mac (internal) delivery — always created destinations.push({ channel: 'macos' }); + // Start LLM copy generation concurrently — only awaited in the macOS branch + // so external channels (Telegram, SMS) dispatch without LLM latency. + const guardianCopyPromise = generateGuardianCopy( + pendingQuestion.questionText, + request.requestCode, + ); + // Create delivery rows and dispatch for (const dest of destinations) { if (dest.channel === 'macos') { - // Create a dedicated server-side conversation for the mac guardian thread + // Create conversation and delivery row synchronously so they exist + // before awaiting LLM copy — prevents a race where an external channel + // reply resolves the request before the macOS delivery is created. const macConvKey = `asst:${assistantId}:guardian:request:${request.id}`; const { conversationId: macConversationId } = getOrCreateConversation(macConvKey); @@ -117,11 +127,14 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): destinationConversationId: macConversationId, }); + // Now await LLM-generated copy for the message content and thread title + const guardianCopy = await guardianCopyPromise; + // Add the guardian question as the initial message in the thread addMessage( macConversationId, 'assistant', - JSON.stringify([{ type: 'text', text: `Your assistant needs your input during a phone call.\n\nQuestion: ${request.questionText}\n\nReply to this message with your answer.` }]), + JSON.stringify([{ type: 'text', text: guardianCopy.initialMessage }]), { userMessageChannel: 'voice', assistantMessageChannel: 'macos' }, ); @@ -132,7 +145,8 @@ export async function dispatchGuardianQuestion(params: GuardianDispatchParams): conversationId: macConversationId, requestId: request.id, callSessionId, - title: `Guardian question: ${pendingQuestion.questionText.slice(0, 80)}`, + title: guardianCopy.threadTitle, + questionText: request.questionText, } as ServerMessage); } updateDeliveryStatus(delivery.id, 'sent'); diff --git a/assistant/src/calls/guardian-question-copy.ts b/assistant/src/calls/guardian-question-copy.ts new file mode 100644 index 00000000000..6b4ba84bf22 --- /dev/null +++ b/assistant/src/calls/guardian-question-copy.ts @@ -0,0 +1,133 @@ +/** + * Generative copy for guardian question threads. + * + * Uses the configured provider to generate an attention-oriented emoji-prefixed + * thread title and a richer initial message. Falls back to deterministic copy + * when the provider is unavailable or generation fails/times out. + */ + +import { getLogger } from '../util/logger.js'; +import { + resolveConfiguredProvider, + createTimeout, + extractText, + userMessage, +} from '../providers/provider-send-message.js'; + +const log = getLogger('guardian-question-copy'); + +/** Timeout for the generative copy call (ms). */ +const GENERATION_TIMEOUT_MS = 5_000; + +export interface GuardianCopy { + threadTitle: string; + initialMessage: string; +} + +/** + * Build deterministic fallback copy when generation is unavailable or fails. + */ +export function buildFallbackCopy(questionText: string): GuardianCopy { + return { + threadTitle: `\u26A0\uFE0F ${questionText.slice(0, 70)}`, + initialMessage: [ + 'Your assistant needs your input during a phone call.', + '', + `Question: ${questionText}`, + '', + 'Reply to this message with your answer.', + ].join('\n'), + }; +} + +/** + * Generate guardian thread copy (title + initial message) via the configured + * LLM provider. Returns deterministic fallback when the provider is unavailable, + * generation times out, or any error occurs. + */ +export async function generateGuardianCopy( + questionText: string, + requestCode?: string, +): Promise { + const fallback = buildFallbackCopy(questionText); + + // If no provider is configured, return fallback immediately + const resolved = resolveConfiguredProvider(); + if (!resolved) { + log.debug('No provider available for guardian copy generation, using fallback'); + return fallback; + } + + const { signal, cleanup } = createTimeout(GENERATION_TIMEOUT_MS); + + try { + const prompt = [ + 'Generate a thread title and initial message for a guardian question during a live phone call.', + '', + `Question: ${questionText}`, + ...(requestCode ? [`Reference code: ${requestCode}`] : []), + '', + 'Requirements:', + '- TITLE: An emoji-prefixed, attention-oriented, concise title (under 80 characters). Do NOT start with "Guardian question:". Use a relevant warning or alert emoji.', + '- MESSAGE: A clear initial message that includes the question text, mentions this is a live phone call waiting for the user\'s input, and asks them to reply with their answer.', + '', + 'Respond in exactly this format (no extra text):', + 'TITLE: ', + 'MESSAGE: ', + ].join('\n'); + + const response = await resolved.provider.sendMessage( + [userMessage(prompt)], + undefined, + undefined, + { signal, config: { modelIntent: 'latency-optimized' } }, + ); + + const text = extractText(response); + const parsed = parseGeneratedCopy(text); + + if (parsed) { + return parsed; + } + + log.warn({ raw: text }, 'Failed to parse generated guardian copy, using fallback'); + return fallback; + } catch (err) { + if (signal.aborted) { + log.warn('Guardian copy generation timed out, using fallback'); + } else { + log.warn({ err }, 'Guardian copy generation failed, using fallback'); + } + return fallback; + } finally { + cleanup(); + } +} + +/** + * Parse the structured TITLE/MESSAGE response from the model. + * Returns null if the format is not matched. + */ +function parseGeneratedCopy(text: string): GuardianCopy | null { + const titleMatch = text.match(/^TITLE:\s*(.+)/m); + const messageMatch = text.match(/^MESSAGE:\s*([\s\S]+)/m); + + if (!titleMatch || !messageMatch) { + return null; + } + + const title = titleMatch[1].trim(); + const message = messageMatch[1].trim(); + + // Sanity checks: title must be non-empty and under 80 chars, message must be non-empty + if (!title || title.length > 80 || !message) { + return null; + } + + // Reject the old static prefix — the model is guided towards better titles but has final say + if (/^guardian question:/i.test(title)) { + return null; + } + + return { threadTitle: title, initialMessage: message }; +} diff --git a/assistant/src/daemon/ipc-contract/work-items.ts b/assistant/src/daemon/ipc-contract/work-items.ts index d5151e5aaa8..96dc12c01d8 100644 --- a/assistant/src/daemon/ipc-contract/work-items.ts +++ b/assistant/src/daemon/ipc-contract/work-items.ts @@ -221,4 +221,5 @@ export interface GuardianRequestThreadCreated { requestId: string; callSessionId: string; title: string; + questionText: string; } diff --git a/clients/macos/vellum-assistant/App/AppDelegate+Notifications.swift b/clients/macos/vellum-assistant/App/AppDelegate+Notifications.swift index f3e0e7b2bc2..bcb2e482b49 100644 --- a/clients/macos/vellum-assistant/App/AppDelegate+Notifications.swift +++ b/clients/macos/vellum-assistant/App/AppDelegate+Notifications.swift @@ -84,7 +84,19 @@ extension AppDelegate { options: [] ) - center.setNotificationCategories([activityCategory, toolConfirmationCategory, rideShotgunCategory, voiceResponseCategory, quickChatCategory]) + let viewGuardianAction = UNNotificationAction( + identifier: "VIEW_GUARDIAN", + title: "View", + options: [.foreground] + ) + let guardianRequestCategory = UNNotificationCategory( + identifier: "GUARDIAN_REQUEST", + actions: [viewGuardianAction], + intentIdentifiers: [], + options: [] + ) + + center.setNotificationCategories([activityCategory, toolConfirmationCategory, rideShotgunCategory, voiceResponseCategory, quickChatCategory, guardianRequestCategory]) } func registerBundledFonts() { @@ -165,7 +177,17 @@ extension AppDelegate: UNUserNotificationCenterDelegate { let conversationId = response.notification.request.content.userInfo["conversationId"] as? String await MainActor.run { guard !self.isAwaitingFirstLaunchReady else { return } - self.openQuickChatThread(conversationId: conversationId) + self.openConversationThread(conversationId: conversationId) + } + return + } + + // Handle guardian request notifications — open the guardian thread in the main window + if categoryId == "GUARDIAN_REQUEST" { + let conversationId = response.notification.request.content.userInfo["conversationId"] as? String + await MainActor.run { + guard !self.isAwaitingFirstLaunchReady else { return } + self.openConversationThread(conversationId: conversationId) } return } diff --git a/clients/macos/vellum-assistant/App/AppDelegate+Sessions.swift b/clients/macos/vellum-assistant/App/AppDelegate+Sessions.swift index 6cb6f416762..195b95f24be 100644 --- a/clients/macos/vellum-assistant/App/AppDelegate+Sessions.swift +++ b/clients/macos/vellum-assistant/App/AppDelegate+Sessions.swift @@ -402,9 +402,30 @@ extension AppDelegate { } } + func deliverGuardianRequestNotification(title: String, questionText: String, conversationId: String) { + let content = UNMutableNotificationContent() + content.title = title + content.body = String(questionText.prefix(200)) + content.sound = .default + content.categoryIdentifier = "GUARDIAN_REQUEST" + content.userInfo = ["conversationId": conversationId] + + let request = UNNotificationRequest( + identifier: "guardian-request-\(conversationId)", + content: content, + trigger: nil + ) + UNUserNotificationCenter.current().add(request) { error in + if let error { + log.error("Failed to post guardian request notification: \(error.localizedDescription)") + } + } + } + /// Opens the main window and navigates to the thread for the given conversation ID. /// Retries if the thread isn't populated yet (e.g., ThreadManager hasn't loaded it). - func openQuickChatThread(conversationId: String?) { + /// Used by Quick Chat, Guardian Request, and other notification deep links. + func openConversationThread(conversationId: String?) { showMainWindow() guard let conversationId else { return } diff --git a/clients/macos/vellum-assistant/App/AppDelegate.swift b/clients/macos/vellum-assistant/App/AppDelegate.swift index 3c3a15a6030..703d4ec3542 100644 --- a/clients/macos/vellum-assistant/App/AppDelegate.swift +++ b/clients/macos/vellum-assistant/App/AppDelegate.swift @@ -685,10 +685,20 @@ public final class AppDelegate: NSObject, NSApplicationDelegate { callSessionId: msg.callSessionId, title: msg.title ) - if let thread = self.mainWindow?.threadManager.threads.first(where: { $0.sessionId == msg.conversationId }) { - self.mainWindow?.threadManager.activeThreadId = thread.id + if NSApp.isActive { + // App is in foreground — select thread and show window immediately + if let thread = self.mainWindow?.threadManager.threads.first(where: { $0.sessionId == msg.conversationId }) { + self.mainWindow?.threadManager.activeThreadId = thread.id + } + self.showMainWindow() + } else { + // App is backgrounded — post native notification + self.deliverGuardianRequestNotification( + title: msg.title, + questionText: msg.questionText, + conversationId: msg.conversationId + ) } - self.showMainWindow() } // Handle escalation: text_qa -> computer_use via computer_use_request_control diff --git a/clients/shared/IPC/Generated/IPCContractGenerated.swift b/clients/shared/IPC/Generated/IPCContractGenerated.swift index e598c74ab0d..62374838bdc 100644 --- a/clients/shared/IPC/Generated/IPCContractGenerated.swift +++ b/clients/shared/IPC/Generated/IPCContractGenerated.swift @@ -1853,13 +1853,15 @@ public struct IPCGuardianRequestThreadCreated: Codable, Sendable { public let requestId: String public let callSessionId: String public let title: String + public let questionText: String - public init(type: String, conversationId: String, requestId: String, callSessionId: String, title: String) { + public init(type: String, conversationId: String, requestId: String, callSessionId: String, title: String, questionText: String) { self.type = type self.conversationId = conversationId self.requestId = requestId self.callSessionId = callSessionId self.title = title + self.questionText = questionText } }