diff --git a/assistant/src/daemon/handlers/sessions.ts b/assistant/src/daemon/handlers/sessions.ts index 288ecfb5955..9df93fc40a5 100644 --- a/assistant/src/daemon/handlers/sessions.ts +++ b/assistant/src/daemon/handlers/sessions.ts @@ -348,11 +348,16 @@ export function handleHistoryRequest( if (m.role === 'assistant' && m.id) { const linked = getAttachmentsForMessageUnscoped(m.id); if (linked.length > 0) { + // Skip embedding base64 data for attachments larger than 512KB to keep + // the history_response payload small enough for the client to decode + // reliably. The client fetches large attachment data lazily via HTTP. + const MAX_INLINE_B64_SIZE = 512 * 1024; attachments = linked.map((a) => ({ id: a.id, filename: a.originalFilename, mimeType: a.mimeType, - data: a.dataBase64, + data: a.dataBase64.length > MAX_INLINE_B64_SIZE ? '' : a.dataBase64, + ...(a.dataBase64.length > MAX_INLINE_B64_SIZE ? { sizeBytes: a.sizeBytes } : {}), })); } } diff --git a/assistant/src/daemon/ipc-contract.ts b/assistant/src/daemon/ipc-contract.ts index b1bf9295427..c4c3cf7f9e7 100644 --- a/assistant/src/daemon/ipc-contract.ts +++ b/assistant/src/daemon/ipc-contract.ts @@ -43,6 +43,8 @@ export interface UserMessageAttachment { mimeType: string; data: string; extractedText?: string; + /** Original file size in bytes. Present when data was omitted from history_response to reduce payload size. */ + sizeBytes?: number; } export interface ConfirmationResponse { diff --git a/clients/macos/vellum-assistant/Features/Chat/ChatView.swift b/clients/macos/vellum-assistant/Features/Chat/ChatView.swift index 32493ce22b6..0a82ac406ee 100644 --- a/clients/macos/vellum-assistant/Features/Chat/ChatView.swift +++ b/clients/macos/vellum-assistant/Features/Chat/ChatView.swift @@ -94,6 +94,7 @@ struct ChatView: View { var mediaEmbedSettings: MediaEmbedResolverSettings? var isTemporaryChat: Bool = false var activeSubagents: [SubagentInfo] = [] + var daemonHttpPort: Int? /// Triggers auto-scroll when the last message's text length changes (e.g. during streaming). /// Sums utf8.count over each segment (O(1) per contiguous segment) instead of joining first, @@ -603,7 +604,8 @@ struct ChatView: View { onRegenerate: onRegenerate, onSurfaceAction: onSurfaceAction, onReportMessage: onReportMessage, - mediaEmbedSettings: mediaEmbedSettings + mediaEmbedSettings: mediaEmbedSettings, + daemonHttpPort: daemonHttpPort ) .id(message.id) .transition(.opacity.combined(with: .move(edge: .bottom))) @@ -987,6 +989,7 @@ private struct ChatBubble: View { let onSurfaceAction: (String, String, [String: AnyCodable]?) -> Void var onReportMessage: ((String?) -> Void)? var mediaEmbedSettings: MediaEmbedResolverSettings? + var daemonHttpPort: Int? @State private var appearance = AvatarAppearanceManager.shared @State private var isHovered = false @@ -1653,7 +1656,7 @@ private struct ChatBubble: View { if !partitioned.videos.isEmpty { VStack(alignment: .leading, spacing: VSpacing.sm) { ForEach(partitioned.videos) { attachment in - InlineVideoAttachmentView(attachment: attachment) + InlineVideoAttachmentView(attachment: attachment, daemonHttpPort: daemonHttpPort) } } } @@ -1962,7 +1965,7 @@ private struct ChatBubble: View { if !partitioned.videos.isEmpty { VStack(alignment: .leading, spacing: VSpacing.sm) { ForEach(partitioned.videos) { attachment in - InlineVideoAttachmentView(attachment: attachment) + InlineVideoAttachmentView(attachment: attachment, daemonHttpPort: daemonHttpPort) } } } diff --git a/clients/macos/vellum-assistant/Features/Chat/MediaEmbeds/InlineVideoAttachmentView.swift b/clients/macos/vellum-assistant/Features/Chat/MediaEmbeds/InlineVideoAttachmentView.swift index 2e5da481014..fa87a422f83 100644 --- a/clients/macos/vellum-assistant/Features/Chat/MediaEmbeds/InlineVideoAttachmentView.swift +++ b/clients/macos/vellum-assistant/Features/Chat/MediaEmbeds/InlineVideoAttachmentView.swift @@ -1,17 +1,22 @@ import AVKit import SwiftUI import VellumAssistantShared +import os + +private let log = Logger(subsystem: Bundle.main.bundleIdentifier ?? "com.vellum.vellum-assistant", category: "InlineVideoAttachment") /// Inline video player for file-based video attachments (e.g. video/mp4). /// /// Decodes base64 attachment data to a temp file and plays it with native /// AVPlayerView. Uses a click-to-play pattern to avoid auto-playing videos -/// on scroll. +/// on scroll. Supports lazy-loading large attachments via the daemon HTTP API. struct InlineVideoAttachmentView: View { let attachment: ChatAttachment + let daemonHttpPort: Int? @State private var player: AVPlayer? @State private var isPlaying = false + @State private var isLoading = false @State private var failed = false var body: some View { @@ -25,6 +30,8 @@ struct InlineVideoAttachmentView: View { if failed { failedView + } else if isLoading { + loadingView } else if let player, isPlaying { VideoPlayerView(player: player) .clipShape(RoundedRectangle(cornerRadius: VRadius.md)) @@ -59,6 +66,18 @@ struct InlineVideoAttachmentView: View { } } + private var loadingView: some View { + VStack(spacing: VSpacing.sm) { + ProgressView() + .controlSize(.regular) + + Text("Loading video...") + .font(VFont.caption) + .foregroundStyle(VColor.textSecondary) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } + private var failedView: some View { VStack(spacing: VSpacing.xs) { Image(systemName: "exclamationmark.triangle") @@ -85,7 +104,15 @@ struct InlineVideoAttachmentView: View { } private func prepareAndPlay() { - guard let data = Data(base64Encoded: attachment.data) else { + if attachment.isLazyLoad { + fetchAndPlay() + } else { + playFromBase64(attachment.data) + } + } + + private func playFromBase64(_ base64: String) { + guard let data = Data(base64Encoded: base64) else { failed = true return } @@ -104,12 +131,76 @@ struct InlineVideoAttachmentView: View { avPlayer.play() } + private func fetchAndPlay() { + guard let port = daemonHttpPort, let attachmentId = attachment.id.isEmpty ? nil : attachment.id else { + failed = true + return + } + + isLoading = true + Task { + do { + let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId) + await MainActor.run { + isLoading = false + playFromBase64(base64) + } + } catch { + log.error("Failed to fetch attachment \(attachmentId): \(error.localizedDescription)") + await MainActor.run { + isLoading = false + failed = true + } + } + } + } + private func openInExternalPlayer() { - guard let data = Data(base64Encoded: attachment.data) else { return } - let fileURL = safeTempURL() - try? data.write(to: fileURL) - NSWorkspace.shared.open(fileURL) + if attachment.isLazyLoad { + guard let port = daemonHttpPort, let attachmentId = attachment.id.isEmpty ? nil : attachment.id else { return } + isLoading = true + Task { + do { + let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId) + guard let data = Data(base64Encoded: base64) else { return } + let fileURL = safeTempURL() + try data.write(to: fileURL) + await MainActor.run { + isLoading = false + NSWorkspace.shared.open(fileURL) + } + } catch { + await MainActor.run { isLoading = false } + } + } + } else { + guard let data = Data(base64Encoded: attachment.data) else { return } + let fileURL = safeTempURL() + try? data.write(to: fileURL) + NSWorkspace.shared.open(fileURL) + } + } +} + +/// Fetch attachment base64 data from the daemon HTTP endpoint. +private func fetchAttachmentData(port: Int, attachmentId: String) async throws -> String { + guard let token = readSessionToken() else { + throw URLError(.userAuthenticationRequired) + } + let url = URL(string: "http://localhost:\(port)/v1/attachments/\(attachmentId)")! + var request = URLRequest(url: url) + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw URLError(.badServerResponse) + } + + struct AttachmentResponse: Decodable { + let data: String } + let decoded = try JSONDecoder().decode(AttachmentResponse.self, from: data) + return decoded.data } /// NSViewRepresentable wrapper for AVPlayerView. diff --git a/clients/macos/vellum-assistant/Features/MainWindow/MainWindowView.swift b/clients/macos/vellum-assistant/Features/MainWindow/MainWindowView.swift index d6dca00fe96..5fd0c8b8fe4 100644 --- a/clients/macos/vellum-assistant/Features/MainWindow/MainWindowView.swift +++ b/clients/macos/vellum-assistant/Features/MainWindow/MainWindowView.swift @@ -1667,7 +1667,8 @@ private struct ActiveChatViewWrapper: View { allowedDomains: settingsStore.mediaEmbedVideoAllowlistDomains ), isTemporaryChat: isTemporaryChat, - activeSubagents: viewModel.activeSubagents + activeSubagents: viewModel.activeSubagents, + daemonHttpPort: daemonClient.httpPort ) } } diff --git a/clients/shared/Features/Chat/ChatMessage.swift b/clients/shared/Features/Chat/ChatMessage.swift index 652bcc1a713..fa853d08b33 100644 --- a/clients/shared/Features/Chat/ChatMessage.swift +++ b/clients/shared/Features/Chat/ChatMessage.swift @@ -709,7 +709,8 @@ public struct ChatAttachment: Identifiable { public let id: String public let filename: String public let mimeType: String - /// Base64-encoded file data. + /// Base64-encoded file data. Empty when the attachment was too large to embed + /// in the history_response — use ``fetchData(port:)`` to load it lazily. public let data: String /// Pre-rendered thumbnail for image attachments (resized to 120px max dimension). public let thumbnailData: Data? @@ -717,6 +718,9 @@ public struct ChatAttachment: Identifiable { /// Swift's String.count iterates the entire string to count grapheme clusters, /// which is expensive for multi-MB base64 strings on every SwiftUI render pass. public let dataLength: Int + /// Original file size in bytes. Non-nil when `data` is empty because the + /// attachment was too large to inline in the history response. + public let sizeBytes: Int? /// Pre-decoded thumbnail image, cached to avoid decoding PNG data on every /// SwiftUI render pass (each keystroke triggers a re-evaluation of the composer). #if os(macOS) @@ -727,24 +731,30 @@ public struct ChatAttachment: Identifiable { #error("Unsupported platform") #endif + /// Whether this attachment's binary data was omitted to keep the IPC payload small. + /// The client should fetch it lazily via the HTTP endpoint when the user interacts. + public var isLazyLoad: Bool { data.isEmpty && sizeBytes != nil } + #if os(macOS) - public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, thumbnailImage: NSImage?) { + public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, sizeBytes: Int? = nil, thumbnailImage: NSImage?) { self.id = id self.filename = filename self.mimeType = mimeType self.data = data self.thumbnailData = thumbnailData self.dataLength = dataLength + self.sizeBytes = sizeBytes self.thumbnailImage = thumbnailImage } #elseif os(iOS) - public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, thumbnailImage: UIImage?) { + public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, sizeBytes: Int? = nil, thumbnailImage: UIImage?) { self.id = id self.filename = filename self.mimeType = mimeType self.data = data self.thumbnailData = thumbnailData self.dataLength = dataLength + self.sizeBytes = sizeBytes self.thumbnailImage = thumbnailImage } #else diff --git a/clients/shared/Features/Chat/ChatViewModel+MessageHandling.swift b/clients/shared/Features/Chat/ChatViewModel+MessageHandling.swift index b832b636bb2..7c8fff56a97 100644 --- a/clients/shared/Features/Chat/ChatViewModel+MessageHandling.swift +++ b/clients/shared/Features/Chat/ChatViewModel+MessageHandling.swift @@ -275,6 +275,7 @@ extension ChatViewModel { let id = ipc.id ?? UUID().uuidString let base64 = ipc.data let dataLength = base64.count + let sizeBytes: Int? = ipc.sizeBytes.flatMap { Int(exactly: $0) } var thumbnailData: Data? #if os(macOS) @@ -285,7 +286,7 @@ extension ChatViewModel { #error("Unsupported platform") #endif - if ipc.mimeType.hasPrefix("image/"), let rawData = Data(base64Encoded: base64) { + if ipc.mimeType.hasPrefix("image/"), !base64.isEmpty, let rawData = Data(base64Encoded: base64) { thumbnailData = Self.generateThumbnail(from: rawData, maxDimension: 120) #if os(macOS) thumbnailImage = thumbnailData.flatMap { NSImage(data: $0) } @@ -301,6 +302,7 @@ extension ChatViewModel { data: base64, thumbnailData: thumbnailData, dataLength: dataLength, + sizeBytes: sizeBytes, thumbnailImage: thumbnailImage ) } diff --git a/clients/shared/IPC/Generated/IPCContractGenerated.swift b/clients/shared/IPC/Generated/IPCContractGenerated.swift index aa6ea5bcd92..7244c694371 100644 --- a/clients/shared/IPC/Generated/IPCContractGenerated.swift +++ b/clients/shared/IPC/Generated/IPCContractGenerated.swift @@ -1847,6 +1847,8 @@ public struct IPCUserMessageAttachment: Codable, Sendable { public let mimeType: String public let data: String public let extractedText: String? + /// Original file size in bytes. Present when data was omitted from history_response to reduce payload size. + public let sizeBytes: Int? } public struct IPCUserMessageEcho: Codable, Sendable {