diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 7a7eb51ffc9..2c312bbabe3 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -96,6 +96,7 @@ graph TB GW_ROUTE["Route Resolver
chat_id → user_id → default"] GW_FORWARD["Runtime Client
POST /channels/inbound"] GW_REPLY["Send Reply
Telegram sendMessage"] + GW_ATTACH["Send Attachments
sendPhoto / sendDocument"] GW_PROXY["Runtime Proxy
(optional, bearer auth)"] GW_PROBES["/healthz + /readyz
k8s liveness/readiness"] end @@ -195,6 +196,7 @@ graph TB GW_ROUTE --> GW_FORWARD GW_FORWARD -->|"HTTP"| HTTP_SERVER GW_REPLY -->|"Telegram API"| GW_WEBHOOK + GW_ATTACH -->|"download from runtime
+ upload to Telegram"| GW_WEBHOOK %% Gateway flow — Runtime proxy path (optional) GW_PROXY -->|"HTTP (forwarded)"| HTTP_SERVER @@ -661,7 +663,7 @@ graph LR S3["cu_error
message"] S4["assistant_text_delta
streaming text"] S5["assistant_thinking_delta
streaming thinking"] - S6["message_complete
usage stats"] + S6["message_complete
usage stats, attachments?"] S7["ambient_result
decision, summary/suggestion"] S8["confirmation_request
tool, risk_level,
executionTarget"] S9["memory_recalled
context segments"] @@ -669,7 +671,7 @@ graph LR S11["generation_cancelled"] S12["message_queued
position in queue"] S13["message_dequeued
queue drained"] - S14["generation_handoff
sessionId, requestId?,
queuedCount"] + S14["generation_handoff
sessionId, requestId?,
queuedCount, attachments?"] S15["trace_event
eventId, sessionId, requestId?,
timestampMs, sequence, kind,
status?, summary, attributes?"] S16["session_error
sessionId, code,
userMessage, retryable,
debugDetails?"] end diff --git a/README.md b/README.md index 9deb280fe07..7c981cc6481 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,47 @@ Host tools (`host_bash`, `host_file_read`, `host_file_write`, `host_file_edit`) Run `vellum doctor` for a full diagnostic check including sandbox backend status. +## Assistant Attachments + +The assistant can attach files and images to its replies. Attachments flow through three delivery channels: + +### Desktop (IPC) + +Attachments are sent inline (base64) in `message_complete`, `generation_handoff`, and `history_response` IPC messages. The macOS app renders thumbnails for images and displays file metadata for documents. + +### Runtime HTTP API + +The `GET /v1/assistants/:id/messages` endpoint returns attachment metadata on each message: + +```json +{ + "id": "att_xxx", + "filename": "chart.png", + "mimeType": "image/png", + "sizeBytes": 12345, + "kind": "image" +} +``` + +Fetch the full attachment payload (including base64-encoded data) via: + +``` +GET /v1/assistants/:assistantId/attachments/:attachmentId +``` + +### Telegram + +The gateway downloads attachments from the runtime API and delivers them via Telegram's `sendPhoto` (images) or `sendDocument` (other files). Oversized attachments (exceeding `GATEWAY_MAX_ATTACHMENT_BYTES`, default 20 MB) are skipped. Partial failures send a user-visible notice listing undelivered files. + +### Attachment Sources + +The assistant creates attachments from two sources: + +1. **Directives**: `` tags in response text. Sandbox paths are relative to the working directory; host paths require user approval. +2. **Tool output**: Image and file content blocks from tool results are automatically converted into attachments. + +Limits: up to 5 attachments per turn, 20 MB each. + ## Remote Access Access a remote assistant daemon from your local machine via SSH. diff --git a/assistant/src/config/system-prompt.ts b/assistant/src/config/system-prompt.ts index 0f40f3c1d3b..8ad717ef2d7 100644 --- a/assistant/src/config/system-prompt.ts +++ b/assistant/src/config/system-prompt.ts @@ -58,10 +58,30 @@ export function buildSystemPrompt(): string { if (soul) parts.push(soul); if (user) parts.push(user); parts.push(buildConfigSection(baseDir)); + parts.push(buildAttachmentSection()); return appendSkillsCatalog(parts.join('\n\n')); } +function buildAttachmentSection(): string { + return [ + '## Sending Files and Images', + '', + 'To attach a file or image to your reply, include a self-closing XML tag in your response text:', + '', + '```', + '', + '```', + '', + '- `source`: `sandbox` (default, files inside the sandbox working directory) or `host` (absolute paths on the host filesystem — requires user approval).', + '- `path`: Required. Relative path for sandbox, absolute path for host.', + '- `filename`: Optional override for the delivered filename (defaults to the basename of the path).', + '- `mime_type`: Optional MIME type override (inferred from the file extension if omitted).', + '', + 'Limits: up to 5 attachments per turn, 20 MB each. Tool outputs that produce image or file content blocks are also automatically converted into attachments.', + ].join('\n'); +} + function buildConfigSection(configDir: string): string { return [ '## Configuration', diff --git a/gateway/README.md b/gateway/README.md index 9a97ee64bef..305dc2ffc11 100644 --- a/gateway/README.md +++ b/gateway/README.md @@ -108,6 +108,18 @@ curl -i -X POST http://localhost:7830/webhooks/telegram - The `host` header is not forwarded to upstream. - Upstream connection failures return `502 Bad Gateway`. +## Outbound Attachments (Telegram) + +When the assistant includes attachments in a reply, the gateway downloads each attachment from the runtime API and delivers it to the Telegram chat: + +- **Images** (`image/*` MIME types) are sent via `sendPhoto` (multipart form upload). +- **Other files** are sent via `sendDocument` (multipart form upload). +- **Oversized** attachments (exceeding `GATEWAY_MAX_ATTACHMENT_BYTES`, default 20 MB) are silently skipped. +- **Partial failures** are handled gracefully: each attachment is attempted independently. If any fail, a single summary notice is sent to the chat listing the undelivered filenames. +- **Concurrency** is controlled by `GATEWAY_MAX_ATTACHMENT_CONCURRENCY` (default 3). + +Text and attachments are sent separately — the text reply goes first via `sendMessage`, then each attachment follows. + ## Health & Readiness Probes | Endpoint | Method | Behavior |