Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 59 additions & 36 deletions assistant/src/__tests__/channel-approval-routes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1524,8 +1524,8 @@ describe('SMS channel approval decisions', () => {

test('non-decision SMS message during pending approval triggers reminder with plain-text fallback', async () => {
const orchestrator = makeMockOrchestrator();
const deliverSpy = spyOn(gatewayClient, 'deliverApprovalPrompt').mockResolvedValue(undefined);
const replySpy = spyOn(gatewayClient, 'deliverChannelReply').mockResolvedValue(undefined);
const deliverSpy = spyOn(gatewayClient, 'deliverChannelReply').mockResolvedValue(undefined);
const approvalSpy = spyOn(gatewayClient, 'deliverApprovalPrompt').mockResolvedValue(undefined);

const initReq = makeSmsInboundRequest({ content: 'init' });
await handleChannelInbound(initReq, noopProcessMessage, 'token', orchestrator);
Expand All @@ -1545,15 +1545,21 @@ describe('SMS channel approval decisions', () => {
expect(body.accepted).toBe(true);
expect(body.approval).toBe('reminder_sent');

// SMS is a non-rich channel so the delivered text should include plain-text fallback
// SMS is non-rich: reminder is delivered as plain text without approval metadata.
expect(deliverSpy).toHaveBeenCalled();
const callArgs = deliverSpy.mock.calls[0];
const deliveredText = callArgs[2] as string;
expect(approvalSpy).not.toHaveBeenCalled();
const reminderCall = deliverSpy.mock.calls.find(
(call) => typeof call[1] === 'object' && (call[1] as { chatId?: string }).chatId === 'sms-chat-123',
);
expect(reminderCall).toBeDefined();
const reminderPayload = reminderCall![1] as { text?: string; approval?: unknown };
const deliveredText = reminderPayload.text ?? '';
expect(deliveredText).toContain("I'm still waiting");
expect(deliveredText).toContain('Reply "yes"');
expect(reminderPayload.approval).toBeUndefined();

deliverSpy.mockRestore();
replySpy.mockRestore();
approvalSpy.mockRestore();
});

test('sourceChannel "sms" is passed to orchestrator.startRun', async () => {
Expand Down Expand Up @@ -1773,8 +1779,8 @@ describe('plain-text fallback surfacing for non-rich channels', () => {

test('reminder prompt includes plainTextFallback for non-rich channel (http-api)', async () => {
const orchestrator = makeMockOrchestrator();
const deliverSpy = spyOn(gatewayClient, 'deliverApprovalPrompt').mockResolvedValue(undefined);
const replySpy = spyOn(gatewayClient, 'deliverChannelReply').mockResolvedValue(undefined);
const deliverSpy = spyOn(gatewayClient, 'deliverChannelReply').mockResolvedValue(undefined);
const approvalSpy = spyOn(gatewayClient, 'deliverApprovalPrompt').mockResolvedValue(undefined);

// Establish the conversation using http-api (non-rich channel)
const initReq = makeInboundRequest({ content: 'init', sourceChannel: 'http-api' });
Expand All @@ -1796,17 +1802,23 @@ describe('plain-text fallback surfacing for non-rich channels', () => {
expect(body.accepted).toBe(true);
expect(body.approval).toBe('reminder_sent');

// The delivered text should include the plainTextFallback instructions
// Non-rich channel uses plain-text delivery with fallback instructions.
expect(deliverSpy).toHaveBeenCalled();
const callArgs = deliverSpy.mock.calls[0];
const deliveredText = callArgs[2] as string;
expect(approvalSpy).not.toHaveBeenCalled();
const reminderCall = deliverSpy.mock.calls.find(
(call) => typeof call[1] === 'object' && (call[1] as { chatId?: string }).chatId === 'chat-123',
);
expect(reminderCall).toBeDefined();
const reminderPayload = reminderCall![1] as { text?: string; approval?: unknown };
const deliveredText = reminderPayload.text ?? '';
// For non-rich channels, the text should contain both the reminder prefix
// AND the plainTextFallback instructions (e.g. "Reply yes to approve")
expect(deliveredText).toContain("I'm still waiting");
expect(deliveredText).toContain('Reply "yes"');
expect(reminderPayload.approval).toBeUndefined();

deliverSpy.mockRestore();
replySpy.mockRestore();
approvalSpy.mockRestore();
});

test('reminder prompt does NOT include plainTextFallback for telegram (rich channel)', async () => {
Expand Down Expand Up @@ -2188,14 +2200,14 @@ describe('guardian-with-binding path regression', () => {
});

// ═══════════════════════════════════════════════════════════════════════════
// 20. Guardian delivery failure denial (WS-2)
// 20. Guardian rich-delivery failure fallback (WS-2)
// ═══════════════════════════════════════════════════════════════════════════

describe('guardian delivery failure → denial', () => {
describe('guardian delivery failure → text fallback', () => {
beforeEach(() => {
});

test('delivery failure denies run and notifies requester', async () => {
test('rich delivery failure falls back to plain text and keeps request pending', async () => {
createBinding({
assistantId: 'self',
channel: 'telegram',
Expand All @@ -2220,29 +2232,30 @@ describe('guardian delivery failure → denial', () => {
await handleChannelInbound(req, noopProcessMessage, 'token', orchestrator);
await new Promise((resolve) => setTimeout(resolve, 1200));

// The run should have been denied
expect(orchestrator.submitDecision).toHaveBeenCalled();
const decisionArgs = (orchestrator.submitDecision as ReturnType<typeof mock>).mock.calls[0];
expect(decisionArgs[1]).toBe('deny');
// Rich button delivery failed, but plain-text fallback succeeded.
expect(orchestrator.submitDecision).not.toHaveBeenCalled();
expect(approvalSpy).toHaveBeenCalled();

// Requester should have been notified that delivery failed
const failureCalls = deliverSpy.mock.calls.filter(
(call) => typeof call[1] === 'object' && (call[1] as { text?: string }).text?.toLowerCase().includes('denied'),
// Guardian should have received a parser-compatible plain-text approval prompt.
const guardianPromptCalls = deliverSpy.mock.calls.filter(
(call) =>
typeof call[1] === 'object' &&
(call[1] as { chatId?: string; text?: string }).chatId === 'guardian-chat-df' &&
((call[1] as { text?: string }).text ?? '').includes('Reply "yes"'),
);
expect(failureCalls.length).toBeGreaterThanOrEqual(1);
expect(guardianPromptCalls.length).toBeGreaterThanOrEqual(1);

// The guardian_request_forwarded success notice should NOT have been
// delivered (since delivery failed).
// Requester should still get the forwarded notice once fallback delivery works.
const successCalls = deliverSpy.mock.calls.filter(
(call) => typeof call[1] === 'object' && (call[1] as { text?: string }).text?.toLowerCase().includes('forwarded'),
);
expect(successCalls.length).toBe(0);
expect(successCalls.length).toBeGreaterThanOrEqual(1);

deliverSpy.mockRestore();
approvalSpy.mockRestore();
});

test('no pending/unresolved approvals remain after delivery failure', async () => {
test('terminal run resolution clears approvals even when rich delivery falls back to text', async () => {
createBinding({
assistantId: 'self',
channel: 'telegram',
Expand All @@ -2265,15 +2278,19 @@ describe('guardian delivery failure → denial', () => {
await handleChannelInbound(req, noopProcessMessage, 'token', orchestrator);
await new Promise((resolve) => setTimeout(resolve, 1200));

// Rich delivery failure alone should not apply an explicit deny decision.
expect(orchestrator.submitDecision).not.toHaveBeenCalled();

// Verify the run ID was created
const runId = orchestrator.realRunId();
expect(runId).toBeTruthy();

// After delivery failure, there should be NO pending approval for the run
// This test orchestrator transitions the run to a terminal failed state,
// which resolves the approval record via run-completion cleanup.
const pendingApproval = getPendingApprovalForRun(runId!);
expect(pendingApproval).toBeNull();

// There should also be NO unresolved approval (it was set to 'denied')
// No unresolved approval should remain after terminal resolution.
const unresolvedApproval = getUnresolvedApprovalForRun(runId!);
expect(unresolvedApproval).toBeNull();

Expand All @@ -2283,14 +2300,14 @@ describe('guardian delivery failure → denial', () => {
});

// ═══════════════════════════════════════════════════════════════════════════
// 20b. Standard approval prompt delivery failure → auto-deny (WS-B)
// 20b. Standard rich prompt delivery failure → text fallback (WS-B)
// ═══════════════════════════════════════════════════════════════════════════

describe('standard approval prompt delivery failure → auto-deny', () => {
describe('standard approval prompt delivery failure → text fallback', () => {
beforeEach(() => {
});

test('standard prompt delivery failure auto-denies the run (fail-closed)', async () => {
test('standard prompt rich-delivery failure falls back to plain text without auto-deny', async () => {
const deliverSpy = spyOn(gatewayClient, 'deliverChannelReply').mockResolvedValue(undefined);
// Make the approval prompt delivery fail for the standard (self-approval) path
const approvalSpy = spyOn(gatewayClient, 'deliverApprovalPrompt').mockRejectedValue(
Expand All @@ -2317,10 +2334,16 @@ describe('standard approval prompt delivery failure → auto-deny', () => {
await handleChannelInbound(req, noopProcessMessage, 'token', orchestrator);
await new Promise((resolve) => setTimeout(resolve, 1200));

// The run should have been auto-denied because the prompt could not be delivered
expect(orchestrator.submitDecision).toHaveBeenCalled();
const decisionArgs = (orchestrator.submitDecision as ReturnType<typeof mock>).mock.calls[0];
expect(decisionArgs[1]).toBe('deny');
expect(approvalSpy).toHaveBeenCalled();
expect(orchestrator.submitDecision).not.toHaveBeenCalled();

const fallbackCalls = deliverSpy.mock.calls.filter(
(call) =>
typeof call[1] === 'object' &&
(call[1] as { chatId?: string; text?: string }).chatId === 'chat-123' &&
((call[1] as { text?: string }).text ?? '').includes('Reply "yes"'),
);
expect(fallbackCalls.length).toBeGreaterThanOrEqual(1);

deliverSpy.mockRestore();
approvalSpy.mockRestore();
Expand Down
128 changes: 125 additions & 3 deletions assistant/src/runtime/approval-message-composer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,22 @@
*
* Generates approval prompt text through a priority chain:
* 1. Assistant preface (macOS parity — reuse existing assistant text)
* 2. Deterministic fallback templates (natural, scenario-specific messages)
*
* A provider-backed generation layer can be inserted between 1 and 2 later.
* 2. Provider-generated rewrite of deterministic fallback text
* 3. Deterministic fallback templates (natural, scenario-specific messages)
*/
import { getConfig } from '../config/loader.js';
import { getFailoverProvider, listProviders } from '../providers/registry.js';
import type { Provider } from '../providers/types.js';
import { getLogger } from '../util/logger.js';

const log = getLogger('approval-message-composer');
const APPROVAL_COPY_TIMEOUT_MS = 4_000;
const APPROVAL_COPY_MAX_TOKENS = 180;
const APPROVAL_COPY_SYSTEM_PROMPT =
'You are an assistant writing one user-facing message about permissions/approval state. '
+ 'Keep it concise, natural, and actionable. Preserve factual details exactly. '
+ 'Do not mention internal systems, scenario IDs, or policy engine details. '
+ 'Return plain text only.';

// ---------------------------------------------------------------------------
// Types
Expand Down Expand Up @@ -48,6 +60,22 @@ export interface ApprovalMessageContext {
failureReason?: string;
}

export interface ComposeApprovalMessageGenerativeOptions {
/**
* Optional fallback message to use when generation fails. If omitted,
* the deterministic scenario fallback is used.
*/
fallbackText?: string;
/**
* Require these standalone words in the generated output (case-insensitive).
* Useful for plain-text decision flows where parser-compatible keywords
* like yes/no/always must be present.
*/
requiredKeywords?: string[];
timeoutMs?: number;
maxTokens?: number;
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
Expand All @@ -65,6 +93,100 @@ export function composeApprovalMessage(context: ApprovalMessageContext): string
return getFallbackMessage(context);
}

function escapeRegExp(input: string): string {
return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function includesRequiredKeywords(text: string, requiredKeywords: string[] | undefined): boolean {
if (!requiredKeywords || requiredKeywords.length === 0) return true;
return requiredKeywords.every((keyword) => {
const re = new RegExp(`\\b${escapeRegExp(keyword)}\\b`, 'i');
return re.test(text);
});
}

function buildGenerationPrompt(
context: ApprovalMessageContext,
fallbackText: string,
requiredKeywords: string[] | undefined,
): string {
const keywordClause = requiredKeywords && requiredKeywords.length > 0
? `Required words to include (as standalone words): ${requiredKeywords.join(', ')}.\n`
: '';
return [
'Rewrite the following approval/guardian message as a natural assistant reply to the user.',
'Keep the same concrete facts and next-step guidance.',
keywordClause,
`Context JSON: ${JSON.stringify(context)}`,
`Fallback message: ${fallbackText}`,
].filter(Boolean).join('\n\n');
}

async function generateApprovalMessage(
provider: Provider,
context: ApprovalMessageContext,
fallbackText: string,
options: ComposeApprovalMessageGenerativeOptions,
): Promise<string | null> {
const requiredKeywords = options.requiredKeywords?.map((kw) => kw.trim()).filter((kw) => kw.length > 0);
const prompt = buildGenerationPrompt(context, fallbackText, requiredKeywords);
const response = await provider.sendMessage(
[{ role: 'user', content: [{ type: 'text', text: prompt }] }],
[],
APPROVAL_COPY_SYSTEM_PROMPT,
{
config: {
max_tokens: options.maxTokens ?? APPROVAL_COPY_MAX_TOKENS,
},
signal: AbortSignal.timeout(options.timeoutMs ?? APPROVAL_COPY_TIMEOUT_MS),
},
);

const block = response.content.find((entry) => entry.type === 'text');
const text = block && 'text' in block ? block.text.trim() : '';
if (!text) return null;
const cleaned = text
.replace(/^["'`]+/, '')
.replace(/["'`]+$/, '')
.trim();
if (!cleaned) return null;
if (!includesRequiredKeywords(cleaned, requiredKeywords)) return null;
return cleaned;
}

/**
* Compose user-facing approval copy using the active provider when available,
* with deterministic fallback for reliability.
*/
export async function composeApprovalMessageGenerative(
context: ApprovalMessageContext,
options: ComposeApprovalMessageGenerativeOptions = {},
): Promise<string> {
if (context.assistantPreface && context.assistantPreface.trim().length > 0) {
return context.assistantPreface;
}

const fallbackText = options.fallbackText?.trim() || getFallbackMessage(context);

if (process.env.NODE_ENV === 'test') {
return fallbackText;
}

try {
const config = getConfig();
if (!listProviders().includes(config.provider)) {
return fallbackText;
}
const provider = getFailoverProvider(config.provider, config.providerOrder);
const generated = await generateApprovalMessage(provider, context, fallbackText, options);
if (generated) return generated;
} catch (err) {
log.warn({ err, scenario: context.scenario }, 'Failed to generate approval copy, using fallback');
}

return fallbackText;
}

// ---------------------------------------------------------------------------
// Deterministic fallback templates
// ---------------------------------------------------------------------------
Expand Down
Loading
Loading