Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion assistant/src/daemon/handlers/sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,16 @@ export function handleHistoryRequest(
if (m.role === 'assistant' && m.id) {
const linked = getAttachmentsForMessageUnscoped(m.id);
if (linked.length > 0) {
// Skip embedding base64 data for attachments larger than 512KB to keep
// the history_response payload small enough for the client to decode
// reliably. The client fetches large attachment data lazily via HTTP.
const MAX_INLINE_B64_SIZE = 512 * 1024;
attachments = linked.map((a) => ({
id: a.id,
filename: a.originalFilename,
mimeType: a.mimeType,
data: a.dataBase64,
data: a.dataBase64.length > MAX_INLINE_B64_SIZE ? '' : a.dataBase64,
...(a.dataBase64.length > MAX_INLINE_B64_SIZE ? { sizeBytes: a.sizeBytes } : {}),
Comment on lines +359 to +360
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep non-video attachment payloads inline until lazy loaders exist

The history handler now strips base64 data for every attachment above 512KB, but only video attachments gained a lazy-fetch path in this commit. Existing image/file rendering in ChatView still depends on inline attachment.data (thumbnail/open-in-preview/file-size paths), so large non-video history attachments become unusable or misrepresented (for example, falling back to generic chips with 0 B). Limit omission to video MIME types for now, or add lazy fetch support for the other attachment UIs before dropping inline data.

Useful? React with 👍 / 👎.

}));
}
}
Expand Down
2 changes: 2 additions & 0 deletions assistant/src/daemon/ipc-contract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ export interface UserMessageAttachment {
mimeType: string;
data: string;
extractedText?: string;
/** Original file size in bytes. Present when data was omitted from history_response to reduce payload size. */
sizeBytes?: number;
}

export interface ConfirmationResponse {
Expand Down
9 changes: 6 additions & 3 deletions clients/macos/vellum-assistant/Features/Chat/ChatView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ struct ChatView: View {
var mediaEmbedSettings: MediaEmbedResolverSettings?
var isTemporaryChat: Bool = false
var activeSubagents: [SubagentInfo] = []
var daemonHttpPort: Int?

/// Triggers auto-scroll when the last message's text length changes (e.g. during streaming).
/// Sums utf8.count over each segment (O(1) per contiguous segment) instead of joining first,
Expand Down Expand Up @@ -603,7 +604,8 @@ struct ChatView: View {
onRegenerate: onRegenerate,
onSurfaceAction: onSurfaceAction,
onReportMessage: onReportMessage,
mediaEmbedSettings: mediaEmbedSettings
mediaEmbedSettings: mediaEmbedSettings,
daemonHttpPort: daemonHttpPort
)
.id(message.id)
.transition(.opacity.combined(with: .move(edge: .bottom)))
Expand Down Expand Up @@ -987,6 +989,7 @@ private struct ChatBubble: View {
let onSurfaceAction: (String, String, [String: AnyCodable]?) -> Void
var onReportMessage: ((String?) -> Void)?
var mediaEmbedSettings: MediaEmbedResolverSettings?
var daemonHttpPort: Int?

@State private var appearance = AvatarAppearanceManager.shared
@State private var isHovered = false
Expand Down Expand Up @@ -1653,7 +1656,7 @@ private struct ChatBubble: View {
if !partitioned.videos.isEmpty {
VStack(alignment: .leading, spacing: VSpacing.sm) {
ForEach(partitioned.videos) { attachment in
InlineVideoAttachmentView(attachment: attachment)
InlineVideoAttachmentView(attachment: attachment, daemonHttpPort: daemonHttpPort)
}
}
}
Expand Down Expand Up @@ -1962,7 +1965,7 @@ private struct ChatBubble: View {
if !partitioned.videos.isEmpty {
VStack(alignment: .leading, spacing: VSpacing.sm) {
ForEach(partitioned.videos) { attachment in
InlineVideoAttachmentView(attachment: attachment)
InlineVideoAttachmentView(attachment: attachment, daemonHttpPort: daemonHttpPort)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import AVKit
import SwiftUI
import VellumAssistantShared
import os

private let log = Logger(subsystem: Bundle.main.bundleIdentifier ?? "com.vellum.vellum-assistant", category: "InlineVideoAttachment")

/// Inline video player for file-based video attachments (e.g. video/mp4).
///
/// Decodes base64 attachment data to a temp file and plays it with native
/// AVPlayerView. Uses a click-to-play pattern to avoid auto-playing videos
/// on scroll.
/// on scroll. Supports lazy-loading large attachments via the daemon HTTP API.
struct InlineVideoAttachmentView: View {
let attachment: ChatAttachment
let daemonHttpPort: Int?

@State private var player: AVPlayer?
@State private var isPlaying = false
@State private var isLoading = false
@State private var failed = false

var body: some View {
Expand All @@ -25,6 +30,8 @@ struct InlineVideoAttachmentView: View {

if failed {
failedView
} else if isLoading {
loadingView
} else if let player, isPlaying {
VideoPlayerView(player: player)
.clipShape(RoundedRectangle(cornerRadius: VRadius.md))
Expand Down Expand Up @@ -59,6 +66,18 @@ struct InlineVideoAttachmentView: View {
}
}

private var loadingView: some View {
VStack(spacing: VSpacing.sm) {
ProgressView()
.controlSize(.regular)

Text("Loading video...")
.font(VFont.caption)
.foregroundStyle(VColor.textSecondary)
}
.frame(maxWidth: .infinity, maxHeight: .infinity)
}

private var failedView: some View {
VStack(spacing: VSpacing.xs) {
Image(systemName: "exclamationmark.triangle")
Expand All @@ -85,7 +104,15 @@ struct InlineVideoAttachmentView: View {
}

private func prepareAndPlay() {
guard let data = Data(base64Encoded: attachment.data) else {
if attachment.isLazyLoad {
fetchAndPlay()
} else {
playFromBase64(attachment.data)
}
}

private func playFromBase64(_ base64: String) {
guard let data = Data(base64Encoded: base64) else {
failed = true
return
}
Expand All @@ -104,12 +131,76 @@ struct InlineVideoAttachmentView: View {
avPlayer.play()
}

private func fetchAndPlay() {
guard let port = daemonHttpPort, let attachmentId = attachment.id.isEmpty ? nil : attachment.id else {
failed = true
return
}

isLoading = true
Task {
do {
let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId)
await MainActor.run {
isLoading = false
playFromBase64(base64)
}
} catch {
log.error("Failed to fetch attachment \(attachmentId): \(error.localizedDescription)")
await MainActor.run {
isLoading = false
failed = true
}
}
}
}

private func openInExternalPlayer() {
guard let data = Data(base64Encoded: attachment.data) else { return }
let fileURL = safeTempURL()
try? data.write(to: fileURL)
NSWorkspace.shared.open(fileURL)
if attachment.isLazyLoad {
guard let port = daemonHttpPort, let attachmentId = attachment.id.isEmpty ? nil : attachment.id else { return }
isLoading = true
Task {
do {
let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId)
guard let data = Data(base64Encoded: base64) else { return }
Comment on lines +163 to +165
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 isLoading never reset when base64 decode fails in openInExternalPlayer lazy-load path

When a lazy-loaded video attachment is opened in the external player and the fetched base64 data is invalid (i.e. Data(base64Encoded:) returns nil), the early return exits the Task closure without ever resetting isLoading to false. The UI gets permanently stuck showing the "Loading video..." spinner with no way for the user to recover.

Root Cause

In openInExternalPlayer(), the lazy-load path sets isLoading = true at line 161, then starts an async Task. Inside the do block, after successfully fetching the base64 string, line 165 has:

guard let data = Data(base64Encoded: base64) else { return }

If the base64 decode fails, return exits the entire Task closure. The isLoading = false at line 169 (success path) is skipped, and the catch block at line 172 is not triggered because no error was thrown.

Compare with fetchAndPlay() at InlineVideoAttachmentView.swift:134-155, which correctly handles all paths: isLoading = false is set in both the MainActor.run success block and the catch block, and the base64 decode failure is handled inside playFromBase64 which sets failed = true.

Impact: The user sees an infinite loading spinner and cannot retry or interact with the video attachment. The only way to recover is to navigate away and back.

Suggested change
do {
let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId)
guard let data = Data(base64Encoded: base64) else { return }
let base64 = try await fetchAttachmentData(port: port, attachmentId: attachmentId)
guard let data = Data(base64Encoded: base64) else {
await MainActor.run { isLoading = false; failed = true }
return
}
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

let fileURL = safeTempURL()
try data.write(to: fileURL)
await MainActor.run {
isLoading = false
NSWorkspace.shared.open(fileURL)
}
} catch {
await MainActor.run { isLoading = false }
}
}
} else {
guard let data = Data(base64Encoded: attachment.data) else { return }
let fileURL = safeTempURL()
try? data.write(to: fileURL)
NSWorkspace.shared.open(fileURL)
}
}
}

/// Fetch attachment base64 data from the daemon HTTP endpoint.
private func fetchAttachmentData(port: Int, attachmentId: String) async throws -> String {
guard let token = readSessionToken() else {
throw URLError(.userAuthenticationRequired)
}
let url = URL(string: "http://localhost:\(port)/v1/attachments/\(attachmentId)")!
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Query attachment endpoint with the originating assistant scope

This new request path always calls /v1/attachments/:id, which dispatches as assistantId=self in the runtime router, but desktop IPC sessions persist assistant outputs under this.assistantId ?? 'local-assistant' (assistant/src/daemon/session.ts). Even if auth succeeds, the scoped lookup in handleGetAttachment will return 404 for those history attachments, so lazy-load playback still fails; the request needs the correct assistant scope (or an unscoped retrieval path).

Useful? React with 👍 / 👎.

var request = URLRequest(url: url)
request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
Comment on lines +187 to +192
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use HTTP bearer token when fetching lazy attachments

fetchAttachmentData reads ~/.vellum/session-token via readSessionToken() and sends it as the HTTP bearer token, but the runtime HTTP server authenticates against its separate HTTP token (RUNTIME_PROXY_BEARER_TOKEN/http-token) as set in assistant/src/daemon/lifecycle.ts and checked in assistant/src/runtime/http-server.ts. Because those tokens are generated independently, lazy attachment requests will typically get 401 Unauthorized, so large history videos cannot load.

Useful? React with 👍 / 👎.


let (data, response) = try await URLSession.shared.data(for: request)
guard let http = response as? HTTPURLResponse, http.statusCode == 200 else {
throw URLError(.badServerResponse)
}

struct AttachmentResponse: Decodable {
let data: String
}
let decoded = try JSONDecoder().decode(AttachmentResponse.self, from: data)
return decoded.data
}

/// NSViewRepresentable wrapper for AVPlayerView.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,8 @@ private struct ActiveChatViewWrapper: View {
allowedDomains: settingsStore.mediaEmbedVideoAllowlistDomains
),
isTemporaryChat: isTemporaryChat,
activeSubagents: viewModel.activeSubagents
activeSubagents: viewModel.activeSubagents,
daemonHttpPort: daemonClient.httpPort
)
}
}
Expand Down
16 changes: 13 additions & 3 deletions clients/shared/Features/Chat/ChatMessage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -709,14 +709,18 @@ public struct ChatAttachment: Identifiable {
public let id: String
public let filename: String
public let mimeType: String
/// Base64-encoded file data.
/// Base64-encoded file data. Empty when the attachment was too large to embed
/// in the history_response — use ``fetchData(port:)`` to load it lazily.
public let data: String
/// Pre-rendered thumbnail for image attachments (resized to 120px max dimension).
public let thumbnailData: Data?
/// Pre-computed length of `data` to avoid O(n) String.count during rendering.
/// Swift's String.count iterates the entire string to count grapheme clusters,
/// which is expensive for multi-MB base64 strings on every SwiftUI render pass.
public let dataLength: Int
/// Original file size in bytes. Non-nil when `data` is empty because the
/// attachment was too large to inline in the history response.
public let sizeBytes: Int?
/// Pre-decoded thumbnail image, cached to avoid decoding PNG data on every
/// SwiftUI render pass (each keystroke triggers a re-evaluation of the composer).
#if os(macOS)
Expand All @@ -727,24 +731,30 @@ public struct ChatAttachment: Identifiable {
#error("Unsupported platform")
#endif

/// Whether this attachment's binary data was omitted to keep the IPC payload small.
/// The client should fetch it lazily via the HTTP endpoint when the user interacts.
public var isLazyLoad: Bool { data.isEmpty && sizeBytes != nil }

#if os(macOS)
public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, thumbnailImage: NSImage?) {
public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, sizeBytes: Int? = nil, thumbnailImage: NSImage?) {
self.id = id
self.filename = filename
self.mimeType = mimeType
self.data = data
self.thumbnailData = thumbnailData
self.dataLength = dataLength
self.sizeBytes = sizeBytes
self.thumbnailImage = thumbnailImage
}
#elseif os(iOS)
public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, thumbnailImage: UIImage?) {
public init(id: String, filename: String, mimeType: String, data: String, thumbnailData: Data?, dataLength: Int, sizeBytes: Int? = nil, thumbnailImage: UIImage?) {
self.id = id
self.filename = filename
self.mimeType = mimeType
self.data = data
self.thumbnailData = thumbnailData
self.dataLength = dataLength
self.sizeBytes = sizeBytes
self.thumbnailImage = thumbnailImage
}
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ extension ChatViewModel {
let id = ipc.id ?? UUID().uuidString
let base64 = ipc.data
let dataLength = base64.count
let sizeBytes: Int? = ipc.sizeBytes.flatMap { Int(exactly: $0) }

var thumbnailData: Data?
#if os(macOS)
Expand All @@ -285,7 +286,7 @@ extension ChatViewModel {
#error("Unsupported platform")
#endif

if ipc.mimeType.hasPrefix("image/"), let rawData = Data(base64Encoded: base64) {
if ipc.mimeType.hasPrefix("image/"), !base64.isEmpty, let rawData = Data(base64Encoded: base64) {
thumbnailData = Self.generateThumbnail(from: rawData, maxDimension: 120)
#if os(macOS)
thumbnailImage = thumbnailData.flatMap { NSImage(data: $0) }
Expand All @@ -301,6 +302,7 @@ extension ChatViewModel {
data: base64,
thumbnailData: thumbnailData,
dataLength: dataLength,
sizeBytes: sizeBytes,
thumbnailImage: thumbnailImage
)
}
Expand Down
2 changes: 2 additions & 0 deletions clients/shared/IPC/Generated/IPCContractGenerated.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,8 @@ public struct IPCUserMessageAttachment: Codable, Sendable {
public let mimeType: String
public let data: String
public let extractedText: String?
/// Original file size in bytes. Present when data was omitted from history_response to reduce payload size.
public let sizeBytes: Int?
}

public struct IPCUserMessageEcho: Codable, Sendable {
Expand Down
Loading