From bc411aacfd12942e2ffb6903d4b240e2b330f511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:30:02 +0200 Subject: [PATCH 01/45] Move basic Agent files --- Sources/LiveKit/Agent/Agent.swift | 49 +++++ Sources/LiveKit/Agent/Chat/Message.swift | 41 ++++ .../Agent/Chat/Receive/MessageReceiver.swift | 27 +++ .../TranscriptionDelegateReceiver.swift | 69 ++++++ .../Receive/TranscriptionStreamReceiver.swift | 173 +++++++++++++++ .../Agent/Chat/Send/MessageSender.swift | 27 +++ .../Agent/Chat/Send/TextMessageSender.swift | 56 +++++ .../Agent/Conversation+Environment.swift | 63 ++++++ Sources/LiveKit/Agent/Conversation.swift | 200 ++++++++++++++++++ Sources/LiveKit/Agent/LocalMedia.swift | 165 +++++++++++++++ .../LiveKit/Support/ObservableObject+.swift | 34 +++ 11 files changed, 904 insertions(+) create mode 100644 Sources/LiveKit/Agent/Agent.swift create mode 100644 Sources/LiveKit/Agent/Chat/Message.swift create mode 100644 Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift create mode 100644 Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift create mode 100644 Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift create mode 100644 Sources/LiveKit/Agent/Chat/Send/MessageSender.swift create mode 100644 Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift create mode 100644 Sources/LiveKit/Agent/Conversation+Environment.swift create mode 100644 Sources/LiveKit/Agent/Conversation.swift create mode 100644 Sources/LiveKit/Agent/LocalMedia.swift create mode 100644 Sources/LiveKit/Support/ObservableObject+.swift diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift new file mode 100644 index 000000000..70457f252 --- /dev/null +++ b/Sources/LiveKit/Agent/Agent.swift @@ -0,0 +1,49 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation +import LiveKit + +@MainActor +open class Agent: ObservableObject { + @Published public private(set) var state: AgentState = .idle + + @Published public private(set) var audioTrack: (any AudioTrack)? + @Published public private(set) var avatarVideoTrack: (any VideoTrack)? + + public let participant: Participant + + public init(participant: Participant) { + self.participant = participant + observe(participant) + } + + private func observe(_ participant: Participant) { + Task { [weak self] in + for await _ in participant.changes { + guard let self else { return } + + state = participant.agentState + updateTracks(of: participant) + } + } + } + + private func updateTracks(of participant: Participant) { + audioTrack = participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack + avatarVideoTrack = participant.avatarWorker?.firstCameraVideoTrack + } +} diff --git a/Sources/LiveKit/Agent/Chat/Message.swift b/Sources/LiveKit/Agent/Chat/Message.swift new file mode 100644 index 000000000..529728c1c --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Message.swift @@ -0,0 +1,41 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +/// A message received from the agent. +public struct ReceivedMessage: Identifiable, Equatable, Codable, Sendable { + public let id: String + public let timestamp: Date + public let content: Content + + public enum Content: Equatable, Codable, Sendable { + case agentTranscript(String) + case userTranscript(String) + case userInput(String) + } +} + +/// A message sent to the agent. +public struct SentMessage: Identifiable, Equatable, Codable, Sendable { + public let id: String + public let timestamp: Date + public let content: Content + + public enum Content: Equatable, Codable, Sendable { + case userInput(String) + } +} diff --git a/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift new file mode 100644 index 000000000..2344be30e --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift @@ -0,0 +1,27 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +/// A protocol that defines a message receiver. +/// +/// A message receiver is responsible for creating a stream of messages from the agent. +/// It is used to receive messages from the agent and update the message feed. +/// +/// - SeeAlso: ``ReceivedMessage`` +public protocol MessageReceiver: Sendable { + func messages() async throws -> AsyncStream +} diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift new file mode 100644 index 000000000..824d29a03 --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift @@ -0,0 +1,69 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation +import LiveKit + +/// An actor that receives transcription messages from the room and yields them as messages. +/// +/// Room delegate methods are called multiple times for each message, with a stable message ID +/// that can be direcly used for diffing. +/// +/// Example: +/// ``` +/// { id: "1", content: "Hello" } +/// { id: "1", content: "Hello world!" } +/// ``` +@available(*, deprecated, message: "Use TranscriptionStreamReceiver compatible with livekit-agents 1.0") +actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate { + private let room: Room + private var continuation: AsyncStream.Continuation? + + init(room: Room) { + self.room = room + room.add(delegate: self) + } + + deinit { + room.remove(delegate: self) + } + + /// Creates a new message stream for the transcription delegate receiver. + func messages() -> AsyncStream { + let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) + self.continuation = continuation + return stream + } + + nonisolated func room(_: Room, participant: Participant, trackPublication _: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment]) { + segments + .filter { !$0.text.isEmpty } + .forEach { segment in + let message = ReceivedMessage( + id: segment.id, + timestamp: segment.lastReceivedTime, + content: participant.isAgent ? .agentTranscript(segment.text) : .userTranscript(segment.text) + ) + Task { + await yield(message) + } + } + } + + private func yield(_ message: ReceivedMessage) { + continuation?.yield(message) + } +} diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift new file mode 100644 index 000000000..9ac2a1ce1 --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift @@ -0,0 +1,173 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation +import LiveKit + +/// An actor that converts raw text streams from the LiveKit `Room` into `Message` objects. +/// - Note: Streams are supported by `livekit-agents` >= 1.0.0. +/// - SeeAlso: ``TranscriptionDelegateReceiver`` +/// +/// For agent messages, new text stream is emitted for each message, and the stream is closed when the message is finalized. +/// Each agent message is delivered in chunks, that are accumulated and published into the message stream. +/// +/// For user messages, the full transcription is sent each time, but may be updated until finalized. +/// +/// The ID of the segment is stable and unique across the lifetime of the message. +/// This ID can be used directly for `Identifiable` conformance. +/// +/// Example text stream for agent messages: +/// ``` +/// { segment_id: "1", content: "Hello" } +/// { segment_id: "1", content: " world" } +/// { segment_id: "1", content: "!" } +/// { segment_id: "2", content: "Hello" } +/// { segment_id: "2", content: " Apple" } +/// { segment_id: "2", content: "!" } +/// ``` +/// +/// Example text stream for user messages: +/// ``` +/// { segment_id: "3", content: "Hello" } +/// { segment_id: "3", content: "Hello world!" } +/// { segment_id: "4", content: "Hello" } +/// { segment_id: "4", content: "Hello Apple!" } +/// ``` +/// +/// Example output: +/// ``` +/// Message(id: "1", timestamp: 2025-01-01 12:00:00 +0000, content: .agentTranscript("Hello world!")) +/// Message(id: "2", timestamp: 2025-01-01 12:00:10 +0000, content: .agentTranscript("Hello Apple!")) +/// Message(id: "3", timestamp: 2025-01-01 12:00:20 +0000, content: .userTranscript("Hello world!")) +/// Message(id: "4", timestamp: 2025-01-01 12:00:30 +0000, content: .userTranscript("Hello Apple!")) +/// ``` +/// +actor TranscriptionStreamReceiver: MessageReceiver { + private struct PartialMessageID: Hashable { + let segmentID: String + let participantID: Participant.Identity + } + + private struct PartialMessage { + var content: String + let timestamp: Date + var streamID: String + + mutating func appendContent(_ newContent: String) { + content += newContent + } + + mutating func replaceContent(_ newContent: String, streamID: String) { + content = newContent + self.streamID = streamID + } + } + + private let transcriptionTopic = "lk.transcription" + private enum TranscriptionAttributes: String { + case final = "lk.transcription_final" + case segment = "lk.segment_id" + } + + private let room: Room + + private lazy var partialMessages: [PartialMessageID: PartialMessage] = [:] + + init(room: Room) { + self.room = room + } + + /// Creates a new message stream for the chat topic. + func messages() async throws -> AsyncStream { + let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) + + try await room.registerTextStreamHandler(for: transcriptionTopic) { [weak self] reader, participantIdentity in + guard let self else { return } + for try await message in reader where !message.isEmpty { + await continuation.yield(processIncoming(partialMessage: message, reader: reader, participantIdentity: participantIdentity)) + } + } + + continuation.onTermination = { [weak self] _ in + Task { + guard let self else { return } + await self.room.unregisterTextStreamHandler(for: self.transcriptionTopic) + } + } + + return stream + } + + /// Aggregates the incoming text into a message, storing the partial content in the `partialMessages` dictionary. + /// - Note: When the message is finalized, or a new message is started, the dictionary is purged to limit memory usage. + private func processIncoming(partialMessage message: String, reader: TextStreamReader, participantIdentity: Participant.Identity) -> ReceivedMessage { + let segmentID = reader.info.attributes[TranscriptionAttributes.segment.rawValue] ?? reader.info.id + let participantID = participantIdentity + let partialID = PartialMessageID(segmentID: segmentID, participantID: participantID) + + let currentStreamID = reader.info.id + + let timestamp: Date + let updatedContent: String + + if var existingMessage = partialMessages[partialID] { + // Update existing message + if existingMessage.streamID == currentStreamID { + // Same stream, append content + existingMessage.appendContent(message) + } else { + // Different stream for same segment, replace content + existingMessage.replaceContent(message, streamID: currentStreamID) + } + updatedContent = existingMessage.content + timestamp = existingMessage.timestamp + partialMessages[partialID] = existingMessage + } else { + // This is a new message + updatedContent = message + timestamp = reader.info.timestamp + partialMessages[partialID] = PartialMessage( + content: updatedContent, + timestamp: timestamp, + streamID: currentStreamID + ) + cleanupPreviousTurn(participantIdentity, exceptSegmentID: segmentID) + } + + let isFinal = reader.info.attributes[TranscriptionAttributes.final.rawValue] == "true" + if isFinal { + partialMessages[partialID] = nil + } + + let newOrUpdatedMessage = ReceivedMessage( + id: segmentID, + timestamp: timestamp, + content: participantIdentity == room.localParticipant.identity ? .userTranscript(updatedContent) : .agentTranscript(updatedContent) + ) + + return newOrUpdatedMessage + } + + private func cleanupPreviousTurn(_ participantID: Participant.Identity, exceptSegmentID: String) { + let keysToRemove = partialMessages.keys.filter { + $0.participantID == participantID && $0.segmentID != exceptSegmentID + } + + for key in keysToRemove { + partialMessages[key] = nil + } + } +} diff --git a/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift b/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift new file mode 100644 index 000000000..fe78232c0 --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift @@ -0,0 +1,27 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +/// A protocol that defines a message sender. +/// +/// A message sender is responsible for sending messages to the agent. +/// It is used to send messages to the agent and update the message feed. +/// +/// - SeeAlso: ``SentMessage`` +public protocol MessageSender: Sendable { + func send(_ message: SentMessage) async throws +} diff --git a/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift new file mode 100644 index 000000000..af4bc6fb1 --- /dev/null +++ b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift @@ -0,0 +1,56 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation +import LiveKit + +/// An actor that sends local messages to the agent. +/// Currently, it only supports sending text messages. +/// +/// It also serves as the loopback for the local messages, +/// so that they can be displayed in the message feed +/// without relying on the agent-side transcription. +actor TextMessageSender: MessageSender, MessageReceiver { + private let room: Room + private let topic: String + + private var messageContinuation: AsyncStream.Continuation? + + init(room: Room, topic: String = "lk.chat") { + self.room = room + self.topic = topic + } + + func send(_ message: SentMessage) async throws { + guard case let .userInput(text) = message.content else { return } + + try await room.localParticipant.sendText(text, for: topic) + + let loopbackMessage = ReceivedMessage( + id: message.id, + timestamp: message.timestamp, + content: .userInput(text) + ) + + messageContinuation?.yield(loopbackMessage) + } + + func messages() async throws -> AsyncStream { + let (stream, continuation) = AsyncStream.makeStream() + messageContinuation = continuation + return stream + } +} diff --git a/Sources/LiveKit/Agent/Conversation+Environment.swift b/Sources/LiveKit/Agent/Conversation+Environment.swift new file mode 100644 index 000000000..e2feba7c3 --- /dev/null +++ b/Sources/LiveKit/Agent/Conversation+Environment.swift @@ -0,0 +1,63 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import SwiftUI + +extension EnvironmentValues { + @Entry var agentName: String? = nil +} + +@MainActor +@propertyWrapper +struct LKConversation: DynamicProperty { + @EnvironmentObject private var conversation: Conversation + + var wrappedValue: Conversation { + conversation + } +} + +@MainActor +@propertyWrapper +struct LKLocalMedia: DynamicProperty { + @EnvironmentObject private var localMedia: LocalMedia + + var wrappedValue: LocalMedia { + localMedia + } +} + +@MainActor +@propertyWrapper +struct LKAgent: DynamicProperty { + @EnvironmentObject private var conversation: Conversation + @Environment(\.agentName) private var environmentName + + let agentName: String? + + init(named agentName: String? = nil) { + self.agentName = agentName + } + + var wrappedValue: Agent? { + if let agentName { + return conversation.agent(named: agentName) + } else if let environmentName { + return conversation.agent(named: environmentName) + } + return conversation.agents.values.first + } +} diff --git a/Sources/LiveKit/Agent/Conversation.swift b/Sources/LiveKit/Agent/Conversation.swift new file mode 100644 index 000000000..8edef675b --- /dev/null +++ b/Sources/LiveKit/Agent/Conversation.swift @@ -0,0 +1,200 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Collections +import Foundation +import LiveKit + +@MainActor +open class Conversation: ObservableObject { + // MARK: - Error + + public enum Error: LocalizedError { + case agentNotConnected + case failedToConnect(Swift.Error) + case failedToSend(Swift.Error) + + public var errorDescription: String? { + "TODO" + } + } + + // MARK: - State + + @Published public private(set) var error: Error? + + @Published public private(set) var connectionState: ConnectionState = .disconnected + @Published public private(set) var isListening = false + public var isReady: Bool { + switch connectionState { + case .disconnected where isListening, + .connecting where isListening, + .connected, + .reconnecting: + true + default: + false + } + } + + @Published public private(set) var agents: [Participant.Identity: Agent] = [:] + public var hasAgents: Bool { !agents.isEmpty } + + @Published public private(set) var messages: OrderedDictionary = [:] + + // MARK: - Dependencies + + public let room: Room + + private let credentials: any CredentialsProvider + private let senders: [any MessageSender] + private let receivers: [any MessageReceiver] + + // MARK: - Internal state + + private var waitForAgentTask: Task? + + // MARK: - Init + + public init(credentials: CredentialsProvider, room: Room = .init(), agentName: String? = nil, senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { + self.credentials = credentials + self.room = room + + let textMessageSender = TextMessageSender(room: room) + let senders = senders ?? [textMessageSender] + let receivers = receivers ?? [textMessageSender, TranscriptionStreamReceiver(room: room)] + + self.senders = senders + self.receivers = receivers + + observe(room: room, agentName: agentName) + observe(receivers: receivers) + } + + private func observe(room: Room, agentName _: String?) { + Task { [weak self] in + for await _ in room.changes { + guard let self else { return } + + connectionState = room.connectionState + updateAgents(in: room) + } + } + } + + private func updateAgents(in room: Room) { + let agentParticipants = room.agentParticipants + + var newAgents: [Participant.Identity: Agent] = [:] + + for (identity, participant) in agentParticipants { + if let existingAgent = agents[identity] { + newAgents[identity] = existingAgent + } else { + let newAgent = Agent(participant: participant) + newAgents[identity] = newAgent + } + } + + agents = newAgents + } + + private func observe(receivers: [any MessageReceiver]) { + for receiver in receivers { + Task { [weak self] in + for await message in try await receiver.messages() { + guard let self else { return } + messages.updateValue(message, forKey: message.id) + } + } + } + } + + // MARK: - Agents + + public func agent(named name: String) -> Agent? { + agents.values.first { $0.participant.attributes["lk.agent_name"] == name || $0.participant.identity?.stringValue == name } + } + + public subscript(name: String) -> Agent? { + agent(named: name) + } + + // MARK: - Lifecycle + + public func start(preConnectAudio: Bool = true, waitForAgent: TimeInterval = 20, options: ConnectOptions? = nil, roomOptions: RoomOptions? = nil) async { + guard connectionState == .disconnected else { return } + + error = nil + waitForAgentTask?.cancel() + + defer { + waitForAgentTask = Task { + try await Task.sleep(for: .seconds(waitForAgent)) + try Task.checkCancellation() + if connectionState == .connected, agents.isEmpty { + await end() + self.error = .agentNotConnected + } + } + } + + do { + if preConnectAudio { + try await room.withPreConnectAudio(timeout: waitForAgent) { + await MainActor.run { self.isListening = true } + try await self.room.connect(credentialsProvider: self.credentials, connectOptions: options, roomOptions: roomOptions) + await MainActor.run { self.isListening = false } + } + } else { + try await room.connect(credentialsProvider: credentials, connectOptions: options, roomOptions: roomOptions) + } + } catch { + self.error = .failedToConnect(error) + } + } + + public func end() async { + await room.disconnect() + } + + public func resetError() { + error = nil + } + + // MARK: - Messages + + @discardableResult + public func send(text: String) async -> SentMessage { + let message = SentMessage(id: UUID().uuidString, timestamp: Date(), content: .userInput(text)) + do { + for sender in senders { + try await sender.send(message) + } + } catch { + self.error = .failedToSend(error) + } + return message + } + + public func getMessageHistory() -> [ReceivedMessage] { + messages.values.elements + } + + public func restoreMessageHistory(_ messages: [ReceivedMessage]) { + self.messages = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) }) + } +} diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift new file mode 100644 index 000000000..201395c86 --- /dev/null +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -0,0 +1,165 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@preconcurrency import AVFoundation +import LiveKit + +@MainActor +open class LocalMedia: ObservableObject { + // MARK: - Error + + public enum Error: LocalizedError { + case mediaDevice(Swift.Error) + } + + // MARK: - Devices + + @Published public private(set) var error: Error? + + @Published public private(set) var microphoneTrack: (any AudioTrack)? + @Published public private(set) var cameraTrack: (any VideoTrack)? + @Published public private(set) var screenShareTrack: (any VideoTrack)? + + public var isMicrophoneEnabled: Bool { microphoneTrack != nil } + public var isCameraEnabled: Bool { cameraTrack != nil } + public var isScreenShareEnabled: Bool { screenShareTrack != nil } + + @Published public private(set) var audioDevices: [AudioDevice] = AudioManager.shared.inputDevices + @Published public private(set) var selectedAudioDeviceID: String = AudioManager.shared.inputDevice.deviceId + + @Published public private(set) var videoDevices: [AVCaptureDevice] = [] + @Published public private(set) var selectedVideoDeviceID: String? + + @Published public private(set) var canSwitchCamera = false + + // MARK: - Dependencies + + private var room: Room + + // MARK: - Initialization + + public init(room: Room) { + self.room = room + + observe(room: room) + observeDevices() + } + + public convenience init(conversation: Conversation) { + self.init(room: conversation.room) + } + + private func observe(room: Room) { + Task { [weak self] in + for await _ in room.changes { + guard let self else { return } + + microphoneTrack = room.localParticipant.firstAudioTrack + cameraTrack = room.localParticipant.firstCameraVideoTrack + screenShareTrack = room.localParticipant.firstScreenShareVideoTrack + } + } + } + + private func observeDevices() { + try? AudioManager.shared.set(microphoneMuteMode: .inputMixer) // don't play mute sound effect + Task { + try await AudioManager.shared.setRecordingAlwaysPreparedMode(true) + } + + AudioManager.shared.onDeviceUpdate = { [weak self] _ in + Task { @MainActor in + self?.audioDevices = AudioManager.shared.inputDevices + self?.selectedAudioDeviceID = AudioManager.shared.defaultInputDevice.deviceId + } + } + + Task { + canSwitchCamera = try await CameraCapturer.canSwitchPosition() + videoDevices = try await CameraCapturer.captureDevices() + selectedVideoDeviceID = videoDevices.first?.uniqueID + } + } + + deinit { + AudioManager.shared.onDeviceUpdate = nil + } + + // MARK: - Toggle + + public func toggleMicrophone() async { + do { + try await room.localParticipant.setMicrophone(enabled: !isMicrophoneEnabled) + } catch { + self.error = .mediaDevice(error) + } + } + + public func toggleCamera(disableScreenShare: Bool = false) async { + let enable = !isCameraEnabled + do { + if enable, disableScreenShare, isScreenShareEnabled { + try await room.localParticipant.setScreenShare(enabled: false) + } + + let device = try await CameraCapturer.captureDevices().first(where: { $0.uniqueID == selectedVideoDeviceID }) + try await room.localParticipant.setCamera(enabled: enable, captureOptions: CameraCaptureOptions(device: device)) + } catch { + self.error = .mediaDevice(error) + } + } + + public func toggleScreenShare(disableCamera: Bool = false) async { + let enable = !isScreenShareEnabled + do { + if enable, disableCamera, isCameraEnabled { + try await room.localParticipant.setCamera(enabled: false) + } + try await room.localParticipant.setScreenShare(enabled: enable) + } catch { + self.error = .mediaDevice(error) + } + } + + // MARK: - Select + + public func select(audioDevice: AudioDevice) { + selectedAudioDeviceID = audioDevice.deviceId + + let device = AudioManager.shared.inputDevices.first(where: { $0.deviceId == selectedAudioDeviceID }) ?? AudioManager.shared.defaultInputDevice + AudioManager.shared.inputDevice = device + } + + public func select(videoDevice: AVCaptureDevice) async { + selectedVideoDeviceID = videoDevice.uniqueID + + guard let cameraCapturer = getCameraCapturer() else { return } + let captureOptions = CameraCaptureOptions(device: videoDevice) + _ = try? await cameraCapturer.set(options: captureOptions) + } + + public func switchCamera() async { + guard let cameraCapturer = getCameraCapturer() else { return } + _ = try? await cameraCapturer.switchCameraPosition() + } + + // MARK: - Private + + private func getCameraCapturer() -> CameraCapturer? { + guard let cameraTrack = room.localParticipant.firstCameraVideoTrack as? LocalVideoTrack else { return nil } + return cameraTrack.capturer as? CameraCapturer + } +} diff --git a/Sources/LiveKit/Support/ObservableObject+.swift b/Sources/LiveKit/Support/ObservableObject+.swift new file mode 100644 index 000000000..863d345e0 --- /dev/null +++ b/Sources/LiveKit/Support/ObservableObject+.swift @@ -0,0 +1,34 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Combine + +@available(iOS 15, *) +extension ObservableObject { + typealias BufferedObjectWillChangePublisher = Publishers.Buffer + + // This is necessary due to ObservableObjectPublisher not respecting the demand. + // See: https://forums.swift.org/t/asyncpublisher-causes-crash-in-rather-simple-situation + private var bufferedObjectWillChange: BufferedObjectWillChangePublisher { + objectWillChange + .buffer(size: 1, prefetch: .byRequest, whenFull: .dropOldest) + } + + /// A publisher that emits the `objectWillChange` events. + var changes: AsyncPublisher { + bufferedObjectWillChange.values + } +} From 43861e8c02cf7cc2b80ee6036ab52fad5f79eba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 18 Sep 2025 13:40:13 +0200 Subject: [PATCH 02/45] Fix inconsistencies --- Sources/LiveKit/Agent/Agent.swift | 9 +++-- Sources/LiveKit/Agent/AgentState+.swift | 21 ------------ .../TranscriptionDelegateReceiver.swift | 1 - .../Receive/TranscriptionStreamReceiver.swift | 1 - .../Agent/Chat/Send/TextMessageSender.swift | 1 - .../Agent/Conversation+Environment.swift | 33 ++++++++++++++----- Sources/LiveKit/Agent/Conversation.swift | 7 ++-- Sources/LiveKit/Agent/LocalMedia.swift | 3 +- .../LiveKit/Support/ObservableObject+.swift | 31 +++++++++-------- Sources/LiveKit/Track/VideoTrack.swift | 8 +++++ 10 files changed, 61 insertions(+), 54 deletions(-) delete mode 100644 Sources/LiveKit/Agent/AgentState+.swift diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 70457f252..f2a23bf0b 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -15,7 +15,6 @@ */ import Foundation -import LiveKit @MainActor open class Agent: ObservableObject { @@ -33,7 +32,7 @@ open class Agent: ObservableObject { private func observe(_ participant: Participant) { Task { [weak self] in - for await _ in participant.changes { + for try await _ in participant.changes { guard let self else { return } state = participant.agentState @@ -47,3 +46,9 @@ open class Agent: ObservableObject { avatarVideoTrack = participant.avatarWorker?.firstCameraVideoTrack } } + +extension AgentState: CustomStringConvertible { + public var description: String { + rawValue.capitalized + } +} diff --git a/Sources/LiveKit/Agent/AgentState+.swift b/Sources/LiveKit/Agent/AgentState+.swift deleted file mode 100644 index 9bb45b096..000000000 --- a/Sources/LiveKit/Agent/AgentState+.swift +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright 2025 LiveKit - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -extension AgentState: CustomStringConvertible { - public var description: String { - rawValue.capitalized - } -} diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift index 824d29a03..43c8bfe1a 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift @@ -15,7 +15,6 @@ */ import Foundation -import LiveKit /// An actor that receives transcription messages from the room and yields them as messages. /// diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift index 9ac2a1ce1..40a541ec5 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift @@ -15,7 +15,6 @@ */ import Foundation -import LiveKit /// An actor that converts raw text streams from the LiveKit `Room` into `Message` objects. /// - Note: Streams are supported by `livekit-agents` >= 1.0.0. diff --git a/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift index af4bc6fb1..3fcfc87e0 100644 --- a/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift +++ b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift @@ -15,7 +15,6 @@ */ import Foundation -import LiveKit /// An actor that sends local messages to the agent. /// Currently, it only supports sending text messages. diff --git a/Sources/LiveKit/Agent/Conversation+Environment.swift b/Sources/LiveKit/Agent/Conversation+Environment.swift index e2feba7c3..23ddb4f53 100644 --- a/Sources/LiveKit/Agent/Conversation+Environment.swift +++ b/Sources/LiveKit/Agent/Conversation+Environment.swift @@ -16,43 +16,60 @@ import SwiftUI -extension EnvironmentValues { +#if swift(>=6.0) +public extension EnvironmentValues { @Entry var agentName: String? = nil } +#else +public struct AgentNameKey: EnvironmentKey { + public static let defaultValue: String? = nil +} + +public extension EnvironmentValues { + var agentName: String? { + get { self[AgentNameKey.self] } + set { self[AgentNameKey.self] = newValue } + } +} +#endif @MainActor @propertyWrapper -struct LKConversation: DynamicProperty { +public struct LKConversation: DynamicProperty { @EnvironmentObject private var conversation: Conversation - var wrappedValue: Conversation { + public init() {} + + public var wrappedValue: Conversation { conversation } } @MainActor @propertyWrapper -struct LKLocalMedia: DynamicProperty { +public struct LKLocalMedia: DynamicProperty { @EnvironmentObject private var localMedia: LocalMedia - var wrappedValue: LocalMedia { + public init() {} + + public var wrappedValue: LocalMedia { localMedia } } @MainActor @propertyWrapper -struct LKAgent: DynamicProperty { +public struct LKAgent: DynamicProperty { @EnvironmentObject private var conversation: Conversation @Environment(\.agentName) private var environmentName let agentName: String? - init(named agentName: String? = nil) { + public init(named agentName: String? = nil) { self.agentName = agentName } - var wrappedValue: Agent? { + public var wrappedValue: Agent? { if let agentName { return conversation.agent(named: agentName) } else if let environmentName { diff --git a/Sources/LiveKit/Agent/Conversation.swift b/Sources/LiveKit/Agent/Conversation.swift index 8edef675b..4cb2b452f 100644 --- a/Sources/LiveKit/Agent/Conversation.swift +++ b/Sources/LiveKit/Agent/Conversation.swift @@ -14,9 +14,8 @@ * limitations under the License. */ -import Collections import Foundation -import LiveKit +import OrderedCollections @MainActor open class Conversation: ObservableObject { @@ -86,7 +85,7 @@ open class Conversation: ObservableObject { private func observe(room: Room, agentName _: String?) { Task { [weak self] in - for await _ in room.changes { + for try await _ in room.changes { guard let self else { return } connectionState = room.connectionState @@ -143,7 +142,7 @@ open class Conversation: ObservableObject { defer { waitForAgentTask = Task { - try await Task.sleep(for: .seconds(waitForAgent)) + try await Task.sleep(nanoseconds: UInt64(TimeInterval(NSEC_PER_SEC) * waitForAgent)) try Task.checkCancellation() if connectionState == .connected, agents.isEmpty { await end() diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift index 201395c86..b3f4c6de9 100644 --- a/Sources/LiveKit/Agent/LocalMedia.swift +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -15,7 +15,6 @@ */ @preconcurrency import AVFoundation -import LiveKit @MainActor open class LocalMedia: ObservableObject { @@ -64,7 +63,7 @@ open class LocalMedia: ObservableObject { private func observe(room: Room) { Task { [weak self] in - for await _ in room.changes { + for try await _ in room.changes { guard let self else { return } microphoneTrack = room.localParticipant.firstAudioTrack diff --git a/Sources/LiveKit/Support/ObservableObject+.swift b/Sources/LiveKit/Support/ObservableObject+.swift index 863d345e0..68a3d7a18 100644 --- a/Sources/LiveKit/Support/ObservableObject+.swift +++ b/Sources/LiveKit/Support/ObservableObject+.swift @@ -14,21 +14,24 @@ * limitations under the License. */ -import Combine +@preconcurrency import Combine -@available(iOS 15, *) extension ObservableObject { - typealias BufferedObjectWillChangePublisher = Publishers.Buffer - - // This is necessary due to ObservableObjectPublisher not respecting the demand. - // See: https://forums.swift.org/t/asyncpublisher-causes-crash-in-rather-simple-situation - private var bufferedObjectWillChange: BufferedObjectWillChangePublisher { - objectWillChange - .buffer(size: 1, prefetch: .byRequest, whenFull: .dropOldest) - } - - /// A publisher that emits the `objectWillChange` events. - var changes: AsyncPublisher { - bufferedObjectWillChange.values + /// An async sequence that emits the `objectWillChange` events. + var changes: any AsyncSequence { + if #available(macOS 12.0, iOS 15.0, tvOS 15.0, *) { + // This is necessary due to ObservableObjectPublisher not respecting the demand. + // See: https://forums.swift.org/t/asyncpublisher-causes-crash-in-rather-simple-situation + objectWillChange.buffer(size: 1, prefetch: .byRequest, whenFull: .dropOldest).values + } else { + AsyncStream { continuation in + let cancellable = objectWillChange.sink { _ in + continuation.yield() + } + continuation.onTermination = { _ in + cancellable.cancel() + } + } + } } } diff --git a/Sources/LiveKit/Track/VideoTrack.swift b/Sources/LiveKit/Track/VideoTrack.swift index 65ff33f33..f35e3c50d 100644 --- a/Sources/LiveKit/Track/VideoTrack.swift +++ b/Sources/LiveKit/Track/VideoTrack.swift @@ -71,3 +71,11 @@ extension VideoTrackProtocol where Self: Track { return missingCodecs } } + +public extension VideoTrack { + /// The aspect ratio of the video track or 1 if the dimensions are not available. + var aspectRatio: CGFloat { + guard let dimensions else { return 1 } + return CGFloat(dimensions.width) / CGFloat(dimensions.height) + } +} From 314b1c125576eb24f4b0e7657988e21c237babbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:31:33 +0200 Subject: [PATCH 03/45] Media state from participant --- Sources/LiveKit/Agent/LocalMedia.swift | 44 +++++++++++++++----------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift index b3f4c6de9..e0d7775ab 100644 --- a/Sources/LiveKit/Agent/LocalMedia.swift +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -32,9 +32,9 @@ open class LocalMedia: ObservableObject { @Published public private(set) var cameraTrack: (any VideoTrack)? @Published public private(set) var screenShareTrack: (any VideoTrack)? - public var isMicrophoneEnabled: Bool { microphoneTrack != nil } - public var isCameraEnabled: Bool { cameraTrack != nil } - public var isScreenShareEnabled: Bool { screenShareTrack != nil } + @Published public private(set) var isMicrophoneEnabled: Bool = false + @Published public private(set) var isCameraEnabled: Bool = false + @Published public private(set) var isScreenShareEnabled: Bool = false @Published public private(set) var audioDevices: [AudioDevice] = AudioManager.shared.inputDevices @Published public private(set) var selectedAudioDeviceID: String = AudioManager.shared.inputDevice.deviceId @@ -46,29 +46,37 @@ open class LocalMedia: ObservableObject { // MARK: - Dependencies - private var room: Room + private var localParticipant: LocalParticipant // MARK: - Initialization - public init(room: Room) { - self.room = room + public init(localParticipant: LocalParticipant) { + self.localParticipant = localParticipant - observe(room: room) + observe(localParticipant) observeDevices() } + public convenience init(room: Room) { + self.init(localParticipant: room.localParticipant) + } + public convenience init(conversation: Conversation) { self.init(room: conversation.room) } - private func observe(room: Room) { + private func observe(_ localParticipant: LocalParticipant) { Task { [weak self] in - for try await _ in room.changes { + for try await _ in localParticipant.changes { guard let self else { return } - microphoneTrack = room.localParticipant.firstAudioTrack - cameraTrack = room.localParticipant.firstCameraVideoTrack - screenShareTrack = room.localParticipant.firstScreenShareVideoTrack + microphoneTrack = localParticipant.firstAudioTrack + cameraTrack = localParticipant.firstCameraVideoTrack + screenShareTrack = localParticipant.firstScreenShareVideoTrack + + isMicrophoneEnabled = localParticipant.isMicrophoneEnabled() + isCameraEnabled = localParticipant.isCameraEnabled() + isScreenShareEnabled = localParticipant.isScreenShareEnabled() } } } @@ -101,7 +109,7 @@ open class LocalMedia: ObservableObject { public func toggleMicrophone() async { do { - try await room.localParticipant.setMicrophone(enabled: !isMicrophoneEnabled) + try await localParticipant.setMicrophone(enabled: !isMicrophoneEnabled) } catch { self.error = .mediaDevice(error) } @@ -111,11 +119,11 @@ open class LocalMedia: ObservableObject { let enable = !isCameraEnabled do { if enable, disableScreenShare, isScreenShareEnabled { - try await room.localParticipant.setScreenShare(enabled: false) + try await localParticipant.setScreenShare(enabled: false) } let device = try await CameraCapturer.captureDevices().first(where: { $0.uniqueID == selectedVideoDeviceID }) - try await room.localParticipant.setCamera(enabled: enable, captureOptions: CameraCaptureOptions(device: device)) + try await localParticipant.setCamera(enabled: enable, captureOptions: CameraCaptureOptions(device: device)) } catch { self.error = .mediaDevice(error) } @@ -125,9 +133,9 @@ open class LocalMedia: ObservableObject { let enable = !isScreenShareEnabled do { if enable, disableCamera, isCameraEnabled { - try await room.localParticipant.setCamera(enabled: false) + try await localParticipant.setCamera(enabled: false) } - try await room.localParticipant.setScreenShare(enabled: enable) + try await localParticipant.setScreenShare(enabled: enable) } catch { self.error = .mediaDevice(error) } @@ -158,7 +166,7 @@ open class LocalMedia: ObservableObject { // MARK: - Private private func getCameraCapturer() -> CameraCapturer? { - guard let cameraTrack = room.localParticipant.firstCameraVideoTrack as? LocalVideoTrack else { return nil } + guard let cameraTrack = localParticipant.firstCameraVideoTrack as? LocalVideoTrack else { return nil } return cameraTrack.capturer as? CameraCapturer } } From 93f30811d8e1c9e5a5207b8d148dd4694997f61c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:37:08 +0200 Subject: [PATCH 04/45] Naming --- Sources/LiveKit/Agent/Conversation+Environment.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/LiveKit/Agent/Conversation+Environment.swift b/Sources/LiveKit/Agent/Conversation+Environment.swift index 23ddb4f53..d0f64cde7 100644 --- a/Sources/LiveKit/Agent/Conversation+Environment.swift +++ b/Sources/LiveKit/Agent/Conversation+Environment.swift @@ -35,7 +35,7 @@ public extension EnvironmentValues { @MainActor @propertyWrapper -public struct LKConversation: DynamicProperty { +public struct LiveKitConversation: DynamicProperty { @EnvironmentObject private var conversation: Conversation public init() {} @@ -47,7 +47,7 @@ public struct LKConversation: DynamicProperty { @MainActor @propertyWrapper -public struct LKLocalMedia: DynamicProperty { +public struct LiveKitLocalMedia: DynamicProperty { @EnvironmentObject private var localMedia: LocalMedia public init() {} @@ -59,13 +59,13 @@ public struct LKLocalMedia: DynamicProperty { @MainActor @propertyWrapper -public struct LKAgent: DynamicProperty { +public struct LiveKitAgent: DynamicProperty { @EnvironmentObject private var conversation: Conversation @Environment(\.agentName) private var environmentName let agentName: String? - public init(named agentName: String? = nil) { + public init(_ agentName: String? = nil) { self.agentName = agentName } From 319798f67dfb5e28b6d718815bd8b585965851bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 23 Sep 2025 10:21:41 +0200 Subject: [PATCH 05/45] Attributes gen --- .../Receive/TranscriptionStreamReceiver.swift | 25 ++++++++-------- .../Types/Attributes/AttributeTypings.swift | 29 +++++++++++++++++++ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift index 40a541ec5..758ce5043 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift @@ -54,7 +54,7 @@ import Foundation /// Message(id: "4", timestamp: 2025-01-01 12:00:30 +0000, content: .userTranscript("Hello Apple!")) /// ``` /// -actor TranscriptionStreamReceiver: MessageReceiver { +actor TranscriptionStreamReceiver: MessageReceiver, Loggable { private struct PartialMessageID: Hashable { let segmentID: String let participantID: Participant.Identity @@ -75,25 +75,21 @@ actor TranscriptionStreamReceiver: MessageReceiver { } } - private let transcriptionTopic = "lk.transcription" - private enum TranscriptionAttributes: String { - case final = "lk.transcription_final" - case segment = "lk.segment_id" - } - private let room: Room + private let topic: String private lazy var partialMessages: [PartialMessageID: PartialMessage] = [:] - init(room: Room) { + init(room: Room, topic: String = "lk.transcription") { self.room = room + self.topic = topic } /// Creates a new message stream for the chat topic. func messages() async throws -> AsyncStream { let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) - try await room.registerTextStreamHandler(for: transcriptionTopic) { [weak self] reader, participantIdentity in + try await room.registerTextStreamHandler(for: topic) { [weak self] reader, participantIdentity in guard let self else { return } for try await message in reader where !message.isEmpty { await continuation.yield(processIncoming(partialMessage: message, reader: reader, participantIdentity: participantIdentity)) @@ -103,7 +99,7 @@ actor TranscriptionStreamReceiver: MessageReceiver { continuation.onTermination = { [weak self] _ in Task { guard let self else { return } - await self.room.unregisterTextStreamHandler(for: self.transcriptionTopic) + await self.room.unregisterTextStreamHandler(for: self.topic) } } @@ -113,7 +109,12 @@ actor TranscriptionStreamReceiver: MessageReceiver { /// Aggregates the incoming text into a message, storing the partial content in the `partialMessages` dictionary. /// - Note: When the message is finalized, or a new message is started, the dictionary is purged to limit memory usage. private func processIncoming(partialMessage message: String, reader: TextStreamReader, participantIdentity: Participant.Identity) -> ReceivedMessage { - let segmentID = reader.info.attributes[TranscriptionAttributes.segment.rawValue] ?? reader.info.id + let attributes = reader.info.attributes.mapped(to: TranscriptionAttributes.self) + if attributes == nil { + log("Unable to read message attributes from \(reader.info.attributes)", .error) + } + + let segmentID = attributes?.lkSegmentID ?? reader.info.id let participantID = participantIdentity let partialID = PartialMessageID(segmentID: segmentID, participantID: participantID) @@ -146,7 +147,7 @@ actor TranscriptionStreamReceiver: MessageReceiver { cleanupPreviousTurn(participantIdentity, exceptSegmentID: segmentID) } - let isFinal = reader.info.attributes[TranscriptionAttributes.final.rawValue] == "true" + let isFinal = attributes?.lkTranscriptionFinal ?? false if isFinal { partialMessages[partialID] = nil } diff --git a/Sources/LiveKit/Types/Attributes/AttributeTypings.swift b/Sources/LiveKit/Types/Attributes/AttributeTypings.swift index 35dbc8f0b..9fc56e609 100644 --- a/Sources/LiveKit/Types/Attributes/AttributeTypings.swift +++ b/Sources/LiveKit/Types/Attributes/AttributeTypings.swift @@ -20,6 +20,35 @@ import Foundation extension AgentAttributes: Hashable {} extension AgentAttributes: Equatable {} +// Bool as String encoding +extension TranscriptionAttributes { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + lkSegmentID = try container.decodeIfPresent(String.self, forKey: .lkSegmentID) + lkTranscribedTrackID = try container.decodeIfPresent(String.self, forKey: .lkTranscribedTrackID) + + // Decode as Bool first, fallback to String + if let boolValue = try? container.decodeIfPresent(Bool.self, forKey: .lkTranscriptionFinal) { + lkTranscriptionFinal = boolValue + } else if let stringValue = try? container.decodeIfPresent(String.self, forKey: .lkTranscriptionFinal) { + lkTranscriptionFinal = (stringValue as NSString).boolValue + } else { + lkTranscriptionFinal = nil + } + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encodeIfPresent(lkSegmentID, forKey: .lkSegmentID) + try container.encodeIfPresent(lkTranscribedTrackID, forKey: .lkTranscribedTrackID) + + // Always encode Bool as a string if it exists + if let boolValue = lkTranscriptionFinal { + try container.encode(boolValue ? "true" : "false", forKey: .lkTranscriptionFinal) + } + } +} + // MARK: - AgentAttributes struct AgentAttributes: Codable, Sendable { From d4a496e74630a85e498668deac0051d08ea1dbe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:58:53 +0200 Subject: [PATCH 06/45] Transcription tests --- .../Agent/TranscriptionTests.swift | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 Tests/LiveKitTests/Agent/TranscriptionTests.swift diff --git a/Tests/LiveKitTests/Agent/TranscriptionTests.swift b/Tests/LiveKitTests/Agent/TranscriptionTests.swift new file mode 100644 index 000000000..921caccc9 --- /dev/null +++ b/Tests/LiveKitTests/Agent/TranscriptionTests.swift @@ -0,0 +1,196 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@testable import LiveKit +import OrderedCollections +import XCTest + +actor MessageCollector { + private var updates: [ReceivedMessage] = [] + private var messages: OrderedDictionary = [:] + + func add(_ message: ReceivedMessage) { + updates.append(message) + messages[message.id] = message + } + + func getUpdates() -> [ReceivedMessage] { + updates + } + + func getMessages() -> OrderedDictionary { + messages + } +} + +class TranscriptionTests: LKTestCase, @unchecked Sendable { + // Same segment, same stream + func testUpdates() async throws { + let messageExpectation = expectation(description: "Receives all message updates") + messageExpectation.expectedFulfillmentCount = 3 + + let segmentID = "test-segment" + let topic = "lk.transcription" + + let testChunks = ["Hey", " there!", " What's up?"] + + try await withRooms([ + RoomTestingOptions(canSubscribe: true), + RoomTestingOptions(canPublishData: true), + ]) { rooms in + let receiverRoom = rooms[0] + let senderRoom = rooms[1] + + let receiver = TranscriptionStreamReceiver(room: receiverRoom) + let messageStream = try await receiver.messages() + let streamID = UUID().uuidString + + let messageCollector = MessageCollector() + + let collectionTask = Task { @Sendable in + var iterator = messageStream.makeAsyncIterator() + while let message = await iterator.next() { + await messageCollector.add(message) + messageExpectation.fulfill() + } + } + + for (index, chunk) in testChunks.enumerated() { + let isLast = index == testChunks.count - 1 + + var attributes: [String: String] = [ + "lk.segment_id": segmentID, + "lk.transcription_final": "false", + ] + + if isLast { + attributes["lk.transcription_final"] = "true" + } + + let options = StreamTextOptions( + topic: topic, + attributes: attributes, + id: streamID + ) + + try await senderRoom.localParticipant.sendText(chunk, options: options) + try await Task.sleep(nanoseconds: 10_000_000) + } + + await self.fulfillment(of: [messageExpectation], timeout: 5) + collectionTask.cancel() + + let updates = await messageCollector.getUpdates() + XCTAssertEqual(updates.count, 3) + XCTAssertEqual(updates[0].content, .agentTranscript("Hey")) + XCTAssertEqual(updates[1].content, .agentTranscript("Hey there!")) + XCTAssertEqual(updates[2].content, .agentTranscript("Hey there! What's up?")) + + XCTAssertEqual(updates[0].id, segmentID) + XCTAssertEqual(updates[1].id, segmentID) + XCTAssertEqual(updates[2].id, segmentID) + + let firstTimestamp = updates[0].timestamp + XCTAssertEqual(updates[1].timestamp, firstTimestamp) + XCTAssertEqual(updates[2].timestamp, firstTimestamp) + + let messages = await messageCollector.getMessages() + XCTAssertEqual(messages.count, 1) + XCTAssertEqual(messages.keys[0], segmentID) + XCTAssertEqual(messages.values[0].content, .agentTranscript("Hey there! What's up?")) + XCTAssertEqual(messages.values[0].id, segmentID) + XCTAssertEqual(messages.values[0].timestamp, firstTimestamp) + } + } + + // Same segment, different stream + func testReplace() async throws { + let messageExpectation = expectation(description: "Receives all message updates") + messageExpectation.expectedFulfillmentCount = 3 + + let segmentID = "test-segment" + let topic = "lk.transcription" + + let testChunks = ["Hey", "Hey there!", "Hey there! What's up?"] + + try await withRooms([ + RoomTestingOptions(canSubscribe: true), + RoomTestingOptions(canPublishData: true), + ]) { rooms in + let receiverRoom = rooms[0] + let senderRoom = rooms[1] + + let receiver = TranscriptionStreamReceiver(room: receiverRoom) + let messageStream = try await receiver.messages() + + let messageCollector = MessageCollector() + + let collectionTask = Task { @Sendable in + var iterator = messageStream.makeAsyncIterator() + while let message = await iterator.next() { + await messageCollector.add(message) + messageExpectation.fulfill() + } + } + + for (index, chunk) in testChunks.enumerated() { + let isLast = index == testChunks.count - 1 + + var attributes: [String: String] = [ + "lk.segment_id": segmentID, + "lk.transcription_final": "false", + ] + + if isLast { + attributes["lk.transcription_final"] = "true" + } + + let options = StreamTextOptions( + topic: topic, + attributes: attributes, + id: UUID().uuidString + ) + + try await senderRoom.localParticipant.sendText(chunk, options: options) + try await Task.sleep(nanoseconds: 10_000_000) + } + + await self.fulfillment(of: [messageExpectation], timeout: 5) + collectionTask.cancel() + + let updates = await messageCollector.getUpdates() + XCTAssertEqual(updates.count, 3) + XCTAssertEqual(updates[0].content, .agentTranscript("Hey")) + XCTAssertEqual(updates[1].content, .agentTranscript("Hey there!")) + XCTAssertEqual(updates[2].content, .agentTranscript("Hey there! What's up?")) + + XCTAssertEqual(updates[0].id, segmentID) + XCTAssertEqual(updates[1].id, segmentID) + XCTAssertEqual(updates[2].id, segmentID) + + let firstTimestamp = updates[0].timestamp + XCTAssertEqual(updates[1].timestamp, firstTimestamp) + XCTAssertEqual(updates[2].timestamp, firstTimestamp) + + let messages = await messageCollector.getMessages() + XCTAssertEqual(messages.count, 1) + XCTAssertEqual(messages.keys[0], segmentID) + XCTAssertEqual(messages.values[0].content, .agentTranscript("Hey there! What's up?")) + XCTAssertEqual(messages.values[0].id, segmentID) + XCTAssertEqual(messages.values[0].timestamp, firstTimestamp) + } + } +} From dfa4db6227b822cfac9e065b859d5dc63837e95a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:21:48 +0200 Subject: [PATCH 07/45] Extract tests --- .../Agent/TranscriptionTests.swift | 247 +++++++++--------- 1 file changed, 118 insertions(+), 129 deletions(-) diff --git a/Tests/LiveKitTests/Agent/TranscriptionTests.swift b/Tests/LiveKitTests/Agent/TranscriptionTests.swift index 921caccc9..1090d8532 100644 --- a/Tests/LiveKitTests/Agent/TranscriptionTests.swift +++ b/Tests/LiveKitTests/Agent/TranscriptionTests.swift @@ -37,160 +37,149 @@ actor MessageCollector { } class TranscriptionTests: LKTestCase, @unchecked Sendable { + private var rooms: [Room] = [] + private var receiver: TranscriptionStreamReceiver! + private var senderRoom: Room! + private var messageCollector: MessageCollector! + private var collectionTask: Task! + private var messageExpectation: XCTestExpectation! + // Same segment, same stream func testUpdates() async throws { - let messageExpectation = expectation(description: "Receives all message updates") - messageExpectation.expectedFulfillmentCount = 3 - let segmentID = "test-segment" - let topic = "lk.transcription" - + let streamID = UUID().uuidString let testChunks = ["Hey", " there!", " What's up?"] + let expectedContent = ["Hey", "Hey there!", "Hey there! What's up?"] + + try await runTranscriptionTest( + chunks: testChunks, + segmentID: segmentID, + streamID: streamID, + expectedContent: expectedContent + ) + } - try await withRooms([ - RoomTestingOptions(canSubscribe: true), - RoomTestingOptions(canPublishData: true), - ]) { rooms in - let receiverRoom = rooms[0] - let senderRoom = rooms[1] + // Same segment, different stream + func testReplace() async throws { + let segmentID = "test-segment" + let testChunks = ["Hey", "Hey there!", "Hey there! What's up?"] + let expectedContent = ["Hey", "Hey there!", "Hey there! What's up?"] + + try await runTranscriptionTest( + chunks: testChunks, + segmentID: segmentID, + streamID: nil, + expectedContent: expectedContent + ) + } - let receiver = TranscriptionStreamReceiver(room: receiverRoom) - let messageStream = try await receiver.messages() - let streamID = UUID().uuidString + private func setupTestEnvironment(expectedCount: Int) async throws { + messageExpectation = expectation(description: "Receives all message updates") + messageExpectation.expectedFulfillmentCount = expectedCount - let messageCollector = MessageCollector() + receiver = TranscriptionStreamReceiver(room: rooms[0]) + let messageStream = try await receiver.messages() + messageCollector = MessageCollector() + senderRoom = rooms[1] - let collectionTask = Task { @Sendable in - var iterator = messageStream.makeAsyncIterator() - while let message = await iterator.next() { - await messageCollector.add(message) - messageExpectation.fulfill() - } + collectionTask = Task { @Sendable in + var iterator = messageStream.makeAsyncIterator() + while let message = await iterator.next() { + await self.messageCollector.add(message) + self.messageExpectation.fulfill() } + } + } - for (index, chunk) in testChunks.enumerated() { - let isLast = index == testChunks.count - 1 - - var attributes: [String: String] = [ - "lk.segment_id": segmentID, - "lk.transcription_final": "false", - ] + private func sendTranscriptionChunks( + chunks: [String], + segmentID: String, + streamID: String? = nil, + to room: Room + ) async throws { + let topic = "lk.transcription" - if isLast { - attributes["lk.transcription_final"] = "true" - } + for (index, chunk) in chunks.enumerated() { + let isLast = index == chunks.count - 1 - let options = StreamTextOptions( - topic: topic, - attributes: attributes, - id: streamID - ) + var attributes: [String: String] = [ + "lk.segment_id": segmentID, + "lk.transcription_final": "false", + ] - try await senderRoom.localParticipant.sendText(chunk, options: options) - try await Task.sleep(nanoseconds: 10_000_000) + if isLast { + attributes["lk.transcription_final"] = "true" } - await self.fulfillment(of: [messageExpectation], timeout: 5) - collectionTask.cancel() - - let updates = await messageCollector.getUpdates() - XCTAssertEqual(updates.count, 3) - XCTAssertEqual(updates[0].content, .agentTranscript("Hey")) - XCTAssertEqual(updates[1].content, .agentTranscript("Hey there!")) - XCTAssertEqual(updates[2].content, .agentTranscript("Hey there! What's up?")) - - XCTAssertEqual(updates[0].id, segmentID) - XCTAssertEqual(updates[1].id, segmentID) - XCTAssertEqual(updates[2].id, segmentID) - - let firstTimestamp = updates[0].timestamp - XCTAssertEqual(updates[1].timestamp, firstTimestamp) - XCTAssertEqual(updates[2].timestamp, firstTimestamp) - - let messages = await messageCollector.getMessages() - XCTAssertEqual(messages.count, 1) - XCTAssertEqual(messages.keys[0], segmentID) - XCTAssertEqual(messages.values[0].content, .agentTranscript("Hey there! What's up?")) - XCTAssertEqual(messages.values[0].id, segmentID) - XCTAssertEqual(messages.values[0].timestamp, firstTimestamp) + let options = StreamTextOptions( + topic: topic, + attributes: attributes, + id: streamID ?? UUID().uuidString + ) + + try await room.localParticipant.sendText(chunk, options: options) + try await Task.sleep(nanoseconds: 10_000_000) } } - // Same segment, different stream - func testReplace() async throws { - let messageExpectation = expectation(description: "Receives all message updates") - messageExpectation.expectedFulfillmentCount = 3 + private func validateTranscriptionResults( + updates: [ReceivedMessage], + messages: OrderedDictionary, + segmentID: String, + expectedContent: [String] + ) { + // Validate updates + XCTAssertEqual(updates.count, expectedContent.count) + for (index, expected) in expectedContent.enumerated() { + XCTAssertEqual(updates[index].content, .agentTranscript(expected)) + XCTAssertEqual(updates[index].id, segmentID) + } - let segmentID = "test-segment" - let topic = "lk.transcription" + // Validate timestamps are consistent + let firstTimestamp = updates[0].timestamp + for update in updates { + XCTAssertEqual(update.timestamp, firstTimestamp) + } - let testChunks = ["Hey", "Hey there!", "Hey there! What's up?"] + // Validate final message + XCTAssertEqual(messages.count, 1) + XCTAssertEqual(messages.keys[0], segmentID) + XCTAssertEqual(messages.values[0].content, .agentTranscript(expectedContent.last!)) + XCTAssertEqual(messages.values[0].id, segmentID) + XCTAssertEqual(messages.values[0].timestamp, firstTimestamp) + } + private func runTranscriptionTest( + chunks: [String], + segmentID: String, + streamID: String? = nil, + expectedContent: [String] + ) async throws { try await withRooms([ RoomTestingOptions(canSubscribe: true), RoomTestingOptions(canPublishData: true), ]) { rooms in - let receiverRoom = rooms[0] - let senderRoom = rooms[1] - - let receiver = TranscriptionStreamReceiver(room: receiverRoom) - let messageStream = try await receiver.messages() - - let messageCollector = MessageCollector() - - let collectionTask = Task { @Sendable in - var iterator = messageStream.makeAsyncIterator() - while let message = await iterator.next() { - await messageCollector.add(message) - messageExpectation.fulfill() - } - } - - for (index, chunk) in testChunks.enumerated() { - let isLast = index == testChunks.count - 1 - - var attributes: [String: String] = [ - "lk.segment_id": segmentID, - "lk.transcription_final": "false", - ] - - if isLast { - attributes["lk.transcription_final"] = "true" - } - - let options = StreamTextOptions( - topic: topic, - attributes: attributes, - id: UUID().uuidString - ) - - try await senderRoom.localParticipant.sendText(chunk, options: options) - try await Task.sleep(nanoseconds: 10_000_000) - } - - await self.fulfillment(of: [messageExpectation], timeout: 5) - collectionTask.cancel() - - let updates = await messageCollector.getUpdates() - XCTAssertEqual(updates.count, 3) - XCTAssertEqual(updates[0].content, .agentTranscript("Hey")) - XCTAssertEqual(updates[1].content, .agentTranscript("Hey there!")) - XCTAssertEqual(updates[2].content, .agentTranscript("Hey there! What's up?")) - - XCTAssertEqual(updates[0].id, segmentID) - XCTAssertEqual(updates[1].id, segmentID) - XCTAssertEqual(updates[2].id, segmentID) - - let firstTimestamp = updates[0].timestamp - XCTAssertEqual(updates[1].timestamp, firstTimestamp) - XCTAssertEqual(updates[2].timestamp, firstTimestamp) - - let messages = await messageCollector.getMessages() - XCTAssertEqual(messages.count, 1) - XCTAssertEqual(messages.keys[0], segmentID) - XCTAssertEqual(messages.values[0].content, .agentTranscript("Hey there! What's up?")) - XCTAssertEqual(messages.values[0].id, segmentID) - XCTAssertEqual(messages.values[0].timestamp, firstTimestamp) + self.rooms = rooms + try await self.setupTestEnvironment(expectedCount: expectedContent.count) + try await self.sendTranscriptionChunks( + chunks: chunks, + segmentID: segmentID, + streamID: streamID, + to: self.senderRoom + ) + + await self.fulfillment(of: [self.messageExpectation], timeout: 5) + self.collectionTask.cancel() + + let updates = await self.messageCollector.getUpdates() + let messages = await self.messageCollector.getMessages() + + self.validateTranscriptionResults( + updates: updates, + messages: messages, + segmentID: segmentID, + expectedContent: expectedContent + ) } } } From 54acf68413f69ab3d557b6c955eeeab78d97fb94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 23 Sep 2025 14:04:39 +0200 Subject: [PATCH 08/45] Renaming --- Sources/LiveKit/Agent/Conversation.swift | 10 +++++----- .../Agent/TranscriptionTests.swift | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) rename Tests/{LiveKitTests => LiveKitCoreTests}/Agent/TranscriptionTests.swift (98%) diff --git a/Sources/LiveKit/Agent/Conversation.swift b/Sources/LiveKit/Agent/Conversation.swift index 4cb2b452f..cac682780 100644 --- a/Sources/LiveKit/Agent/Conversation.swift +++ b/Sources/LiveKit/Agent/Conversation.swift @@ -58,7 +58,7 @@ open class Conversation: ObservableObject { public let room: Room - private let credentials: any CredentialsProvider + private let tokenSource: any TokenSource private let senders: [any MessageSender] private let receivers: [any MessageReceiver] @@ -68,8 +68,8 @@ open class Conversation: ObservableObject { // MARK: - Init - public init(credentials: CredentialsProvider, room: Room = .init(), agentName: String? = nil, senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { - self.credentials = credentials + public init(tokenSource: TokenSource, room: Room = .init(), agentName: String? = nil, senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { + self.tokenSource = tokenSource self.room = room let textMessageSender = TextMessageSender(room: room) @@ -155,11 +155,11 @@ open class Conversation: ObservableObject { if preConnectAudio { try await room.withPreConnectAudio(timeout: waitForAgent) { await MainActor.run { self.isListening = true } - try await self.room.connect(credentialsProvider: self.credentials, connectOptions: options, roomOptions: roomOptions) + try await self.room.connect(tokenSource: self.tokenSource, connectOptions: options, roomOptions: roomOptions) await MainActor.run { self.isListening = false } } } else { - try await room.connect(credentialsProvider: credentials, connectOptions: options, roomOptions: roomOptions) + try await room.connect(tokenSource: tokenSource, connectOptions: options, roomOptions: roomOptions) } } catch { self.error = .failedToConnect(error) diff --git a/Tests/LiveKitTests/Agent/TranscriptionTests.swift b/Tests/LiveKitCoreTests/Agent/TranscriptionTests.swift similarity index 98% rename from Tests/LiveKitTests/Agent/TranscriptionTests.swift rename to Tests/LiveKitCoreTests/Agent/TranscriptionTests.swift index 1090d8532..33713d152 100644 --- a/Tests/LiveKitTests/Agent/TranscriptionTests.swift +++ b/Tests/LiveKitCoreTests/Agent/TranscriptionTests.swift @@ -16,7 +16,9 @@ @testable import LiveKit import OrderedCollections -import XCTest +#if canImport(LiveKitTestSupport) +import LiveKitTestSupport +#endif actor MessageCollector { private var updates: [ReceivedMessage] = [] From bec88b69dd23568507dfc97e0f4defc756a07535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:22:07 +0200 Subject: [PATCH 09/45] Pass token sources --- Sources/LiveKit/Agent/Conversation.swift | 51 ++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/Sources/LiveKit/Agent/Conversation.swift b/Sources/LiveKit/Agent/Conversation.swift index cac682780..63dc59415 100644 --- a/Sources/LiveKit/Agent/Conversation.swift +++ b/Sources/LiveKit/Agent/Conversation.swift @@ -58,7 +58,14 @@ open class Conversation: ObservableObject { public let room: Room - private let tokenSource: any TokenSource + private enum AnyTokenSource { + case fixed(any TokenSourceFixed) + case configurable(any TokenSourceConfigurable) + } + + private let tokenSource: AnyTokenSource + private let agentName: String? + private let senders: [any MessageSender] private let receivers: [any MessageReceiver] @@ -68,8 +75,14 @@ open class Conversation: ObservableObject { // MARK: - Init - public init(tokenSource: TokenSource, room: Room = .init(), agentName: String? = nil, senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { + private init(tokenSource: AnyTokenSource, + agentName: String?, + room: Room, + senders: [any MessageSender]?, + receivers: [any MessageReceiver]?) + { self.tokenSource = tokenSource + self.agentName = agentName self.room = room let textMessageSender = TextMessageSender(room: room) @@ -83,6 +96,23 @@ open class Conversation: ObservableObject { observe(receivers: receivers) } + public convenience init(tokenSource: some TokenSourceFixed, + room: Room = .init(), + senders: [any MessageSender]? = nil, + receivers: [any MessageReceiver]? = nil) + { + self.init(tokenSource: .fixed(tokenSource), agentName: nil, room: room, senders: senders, receivers: receivers) + } + + public convenience init(tokenSource: some TokenSourceConfigurable, + room: Room = .init(), + agentName: String? = nil, + senders: [any MessageSender]? = nil, + receivers: [any MessageReceiver]? = nil) + { + self.init(tokenSource: .configurable(tokenSource), agentName: agentName, room: room, senders: senders, receivers: receivers) + } + private func observe(room: Room, agentName _: String?) { Task { [weak self] in for try await _ in room.changes { @@ -152,14 +182,27 @@ open class Conversation: ObservableObject { } do { + let response: TokenSourceResponse = switch tokenSource { + case let .fixed(s): + try await s.fetch() + case let .configurable(s): + try await s.fetch(TokenRequestOptions(agentName: agentName)) + } + if preConnectAudio { try await room.withPreConnectAudio(timeout: waitForAgent) { await MainActor.run { self.isListening = true } - try await self.room.connect(tokenSource: self.tokenSource, connectOptions: options, roomOptions: roomOptions) + try await self.room.connect(url: response.serverURL.absoluteString, + token: response.participantToken, + connectOptions: options, + roomOptions: roomOptions) await MainActor.run { self.isListening = false } } } else { - try await room.connect(tokenSource: tokenSource, connectOptions: options, roomOptions: roomOptions) + try await room.connect(url: response.serverURL.absoluteString, + token: response.participantToken, + connectOptions: options, + roomOptions: roomOptions) } } catch { self.error = .failedToConnect(error) From 33b02f9eb750d20725f85bfc75ac2532bb08f36e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:37:36 +0200 Subject: [PATCH 10/45] Renaming --- Sources/LiveKit/Agent/LocalMedia.swift | 4 ++-- ...vironment.swift => Session+Environment.swift} | 16 ++++++++-------- .../Agent/{Conversation.swift => Session.swift} | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) rename Sources/LiveKit/Agent/{Conversation+Environment.swift => Session+Environment.swift} (80%) rename Sources/LiveKit/Agent/{Conversation.swift => Session.swift} (99%) diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift index e0d7775ab..a8b721303 100644 --- a/Sources/LiveKit/Agent/LocalMedia.swift +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -61,8 +61,8 @@ open class LocalMedia: ObservableObject { self.init(localParticipant: room.localParticipant) } - public convenience init(conversation: Conversation) { - self.init(room: conversation.room) + public convenience init(session: Session) { + self.init(room: session.room) } private func observe(_ localParticipant: LocalParticipant) { diff --git a/Sources/LiveKit/Agent/Conversation+Environment.swift b/Sources/LiveKit/Agent/Session+Environment.swift similarity index 80% rename from Sources/LiveKit/Agent/Conversation+Environment.swift rename to Sources/LiveKit/Agent/Session+Environment.swift index d0f64cde7..bdf6563c8 100644 --- a/Sources/LiveKit/Agent/Conversation+Environment.swift +++ b/Sources/LiveKit/Agent/Session+Environment.swift @@ -35,13 +35,13 @@ public extension EnvironmentValues { @MainActor @propertyWrapper -public struct LiveKitConversation: DynamicProperty { - @EnvironmentObject private var conversation: Conversation +public struct LiveKitSession: DynamicProperty { + @EnvironmentObject private var session: Session public init() {} - public var wrappedValue: Conversation { - conversation + public var wrappedValue: Session { + session } } @@ -60,7 +60,7 @@ public struct LiveKitLocalMedia: DynamicProperty { @MainActor @propertyWrapper public struct LiveKitAgent: DynamicProperty { - @EnvironmentObject private var conversation: Conversation + @EnvironmentObject private var session: Session @Environment(\.agentName) private var environmentName let agentName: String? @@ -71,10 +71,10 @@ public struct LiveKitAgent: DynamicProperty { public var wrappedValue: Agent? { if let agentName { - return conversation.agent(named: agentName) + return session.agent(named: agentName) } else if let environmentName { - return conversation.agent(named: environmentName) + return session.agent(named: environmentName) } - return conversation.agents.values.first + return session.agents.values.first } } diff --git a/Sources/LiveKit/Agent/Conversation.swift b/Sources/LiveKit/Agent/Session.swift similarity index 99% rename from Sources/LiveKit/Agent/Conversation.swift rename to Sources/LiveKit/Agent/Session.swift index 63dc59415..de5fc74b8 100644 --- a/Sources/LiveKit/Agent/Conversation.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -18,7 +18,7 @@ import Foundation import OrderedCollections @MainActor -open class Conversation: ObservableObject { +open class Session: ObservableObject { // MARK: - Error public enum Error: LocalizedError { From d141d6abbf364e786ea2f13bdf8c4145463d4be2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 11:27:36 +0200 Subject: [PATCH 11/45] Extract Options --- Sources/LiveKit/Agent/Session+Options.swift | 44 +++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Sources/LiveKit/Agent/Session+Options.swift diff --git a/Sources/LiveKit/Agent/Session+Options.swift b/Sources/LiveKit/Agent/Session+Options.swift new file mode 100644 index 000000000..339c59751 --- /dev/null +++ b/Sources/LiveKit/Agent/Session+Options.swift @@ -0,0 +1,44 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +public extension Session { + struct Options { + public var tokenRequestOptions: TokenRequestOptions + public var connectOptions: ConnectOptions + public var roomOptions: RoomOptions + public var room: Room + public var preConnectAudio: Bool + public var agentConnectTimeout: TimeInterval + + public init( + tokenRequestOptions: TokenRequestOptions = .init(), + connectOptions: ConnectOptions = .init(), + roomOptions: RoomOptions = .init(), + room: Room = .init(), + preConnectAudio: Bool = true, + agentConnectTimeout: TimeInterval = 20 + ) { + self.tokenRequestOptions = tokenRequestOptions + self.connectOptions = connectOptions + self.roomOptions = roomOptions + self.room = room + self.preConnectAudio = preConnectAudio + self.agentConnectTimeout = agentConnectTimeout + } + } +} From 0e49e6fcdae46fb5c88776e9e2e9c806cf96e36c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:03:27 +0200 Subject: [PATCH 12/45] Split options --- Sources/LiveKit/Agent/Session+Options.swift | 11 +-- Sources/LiveKit/Agent/Session.swift | 104 ++++++++++++-------- 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/Sources/LiveKit/Agent/Session+Options.swift b/Sources/LiveKit/Agent/Session+Options.swift index 339c59751..c0d8502ad 100644 --- a/Sources/LiveKit/Agent/Session+Options.swift +++ b/Sources/LiveKit/Agent/Session+Options.swift @@ -17,25 +17,16 @@ import Foundation public extension Session { - struct Options { - public var tokenRequestOptions: TokenRequestOptions - public var connectOptions: ConnectOptions - public var roomOptions: RoomOptions + struct Options: Sendable { public var room: Room public var preConnectAudio: Bool public var agentConnectTimeout: TimeInterval public init( - tokenRequestOptions: TokenRequestOptions = .init(), - connectOptions: ConnectOptions = .init(), - roomOptions: RoomOptions = .init(), room: Room = .init(), preConnectAudio: Bool = true, agentConnectTimeout: TimeInterval = 20 ) { - self.tokenRequestOptions = tokenRequestOptions - self.connectOptions = connectOptions - self.roomOptions = roomOptions self.room = room self.preConnectAudio = preConnectAudio self.agentConnectTimeout = agentConnectTimeout diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index de5fc74b8..a5dabdf6a 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -58,13 +58,13 @@ open class Session: ObservableObject { public let room: Room - private enum AnyTokenSource { + private enum TokenSourceConfiguration { case fixed(any TokenSourceFixed) - case configurable(any TokenSourceConfigurable) + case configurable(any TokenSourceConfigurable, TokenRequestOptions) } - private let tokenSource: AnyTokenSource - private let agentName: String? + private let tokenSourceConfiguration: TokenSourceConfiguration + private var options: Options private let senders: [any MessageSender] private let receivers: [any MessageReceiver] @@ -75,45 +75,64 @@ open class Session: ObservableObject { // MARK: - Init - private init(tokenSource: AnyTokenSource, - agentName: String?, - room: Room, + private init(tokenSourceConfiguration: TokenSourceConfiguration, + options: Options, senders: [any MessageSender]?, receivers: [any MessageReceiver]?) { - self.tokenSource = tokenSource - self.agentName = agentName - self.room = room + self.tokenSourceConfiguration = tokenSourceConfiguration + self.options = options + room = options.room let textMessageSender = TextMessageSender(room: room) - let senders = senders ?? [textMessageSender] - let receivers = receivers ?? [textMessageSender, TranscriptionStreamReceiver(room: room)] + let resolvedSenders = senders ?? [textMessageSender] + let resolvedReceivers = receivers ?? [textMessageSender, TranscriptionStreamReceiver(room: room)] - self.senders = senders - self.receivers = receivers + self.senders = resolvedSenders + self.receivers = resolvedReceivers - observe(room: room, agentName: agentName) - observe(receivers: receivers) + observe(room: room) + observe(receivers: resolvedReceivers) } - public convenience init(tokenSource: some TokenSourceFixed, - room: Room = .init(), + public convenience init(tokenSource: any TokenSourceFixed, + options: Options = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { - self.init(tokenSource: .fixed(tokenSource), agentName: nil, room: room, senders: senders, receivers: receivers) + self.init(tokenSourceConfiguration: .fixed(tokenSource), + options: options, + senders: senders, + receivers: receivers) } - public convenience init(tokenSource: some TokenSourceConfigurable, - room: Room = .init(), - agentName: String? = nil, + public convenience init(tokenSource: any TokenSourceConfigurable, + tokenOptions: TokenRequestOptions = .init(), + options: Options = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { - self.init(tokenSource: .configurable(tokenSource), agentName: agentName, room: room, senders: senders, receivers: receivers) + self.init(tokenSourceConfiguration: .configurable(tokenSource, tokenOptions), + options: options, + senders: senders, + receivers: receivers) } - private func observe(room: Room, agentName _: String?) { + public convenience init(agentName: String, + agentMetadata: String? = nil, + tokenSource: any TokenSourceConfigurable, + options: Options = .init(), + senders: [any MessageSender]? = nil, + receivers: [any MessageReceiver]? = nil) + { + self.init(tokenSource: tokenSource, + tokenOptions: .init(agentName: agentName, agentMetadata: agentMetadata), + options: options, + senders: senders, + receivers: receivers) + } + + private func observe(room: Room) { Task { [weak self] in for try await _ in room.changes { guard let self else { return } @@ -164,45 +183,37 @@ open class Session: ObservableObject { // MARK: - Lifecycle - public func start(preConnectAudio: Bool = true, waitForAgent: TimeInterval = 20, options: ConnectOptions? = nil, roomOptions: RoomOptions? = nil) async { + public func start() async { guard connectionState == .disconnected else { return } error = nil waitForAgentTask?.cancel() + let timeout = options.agentConnectTimeout + defer { waitForAgentTask = Task { - try await Task.sleep(nanoseconds: UInt64(TimeInterval(NSEC_PER_SEC) * waitForAgent)) + try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) try Task.checkCancellation() if connectionState == .connected, agents.isEmpty { - await end() self.error = .agentNotConnected } } } do { - let response: TokenSourceResponse = switch tokenSource { - case let .fixed(s): - try await s.fetch() - case let .configurable(s): - try await s.fetch(TokenRequestOptions(agentName: agentName)) - } + let response = try await fetchToken() - if preConnectAudio { - try await room.withPreConnectAudio(timeout: waitForAgent) { + if options.preConnectAudio { + try await room.withPreConnectAudio(timeout: timeout) { await MainActor.run { self.isListening = true } try await self.room.connect(url: response.serverURL.absoluteString, - token: response.participantToken, - connectOptions: options, - roomOptions: roomOptions) + token: response.participantToken) await MainActor.run { self.isListening = false } } } else { try await room.connect(url: response.serverURL.absoluteString, - token: response.participantToken, - connectOptions: options, - roomOptions: roomOptions) + token: response.participantToken) } } catch { self.error = .failedToConnect(error) @@ -239,4 +250,15 @@ open class Session: ObservableObject { public func restoreMessageHistory(_ messages: [ReceivedMessage]) { self.messages = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) }) } + + // MARK: - Helpers + + private func fetchToken() async throws -> TokenSourceResponse { + switch tokenSourceConfiguration { + case let .fixed(source): + try await source.fetch() + case let .configurable(source, options): + try await source.fetch(options) + } + } } From 7b954da94c84686e83474da602c273fdedcace99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:14:38 +0200 Subject: [PATCH 13/45] Nest --- Sources/LiveKit/Agent/Session+Options.swift | 26 ++++++++++----------- Sources/LiveKit/Agent/Session.swift | 10 ++++---- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/Sources/LiveKit/Agent/Session+Options.swift b/Sources/LiveKit/Agent/Session+Options.swift index c0d8502ad..1c86aa61c 100644 --- a/Sources/LiveKit/Agent/Session+Options.swift +++ b/Sources/LiveKit/Agent/Session+Options.swift @@ -16,20 +16,18 @@ import Foundation -public extension Session { - struct Options: Sendable { - public var room: Room - public var preConnectAudio: Bool - public var agentConnectTimeout: TimeInterval +public struct SessionOptions: Sendable { + public var room: Room + public var preConnectAudio: Bool + public var agentConnectTimeout: TimeInterval - public init( - room: Room = .init(), - preConnectAudio: Bool = true, - agentConnectTimeout: TimeInterval = 20 - ) { - self.room = room - self.preConnectAudio = preConnectAudio - self.agentConnectTimeout = agentConnectTimeout - } + public init( + room: Room = .init(), + preConnectAudio: Bool = true, + agentConnectTimeout: TimeInterval = 20 + ) { + self.room = room + self.preConnectAudio = preConnectAudio + self.agentConnectTimeout = agentConnectTimeout } } diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index a5dabdf6a..90959b8e3 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -64,7 +64,7 @@ open class Session: ObservableObject { } private let tokenSourceConfiguration: TokenSourceConfiguration - private var options: Options + private var options: SessionOptions private let senders: [any MessageSender] private let receivers: [any MessageReceiver] @@ -76,7 +76,7 @@ open class Session: ObservableObject { // MARK: - Init private init(tokenSourceConfiguration: TokenSourceConfiguration, - options: Options, + options: SessionOptions, senders: [any MessageSender]?, receivers: [any MessageReceiver]?) { @@ -96,7 +96,7 @@ open class Session: ObservableObject { } public convenience init(tokenSource: any TokenSourceFixed, - options: Options = .init(), + options: SessionOptions = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { @@ -108,7 +108,7 @@ open class Session: ObservableObject { public convenience init(tokenSource: any TokenSourceConfigurable, tokenOptions: TokenRequestOptions = .init(), - options: Options = .init(), + options: SessionOptions = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { @@ -121,7 +121,7 @@ open class Session: ObservableObject { public convenience init(agentName: String, agentMetadata: String? = nil, tokenSource: any TokenSourceConfigurable, - options: Options = .init(), + options: SessionOptions = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) { From 06609c1f71f92978f447972c2e9550b2af97789a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:23:42 +0200 Subject: [PATCH 14/45] Weak --- Sources/LiveKit/Agent/Session.swift | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 90959b8e3..8f6c3737a 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -132,6 +132,10 @@ open class Session: ObservableObject { receivers: receivers) } + deinit { + waitForAgentTask?.cancel() + } + private func observe(room: Room) { Task { [weak self] in for try await _ in room.changes { @@ -192,9 +196,10 @@ open class Session: ObservableObject { let timeout = options.agentConnectTimeout defer { - waitForAgentTask = Task { + waitForAgentTask = Task { [weak self] in try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) try Task.checkCancellation() + guard let self else { return } if connectionState == .connected, agents.isEmpty { self.error = .agentNotConnected } From ac90eb4fcb91b87c5e9abd48286193e43dbf1a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:25:53 +0200 Subject: [PATCH 15/45] Fix existential --- Sources/LiveKit/Track/VideoTrack.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/LiveKit/Track/VideoTrack.swift b/Sources/LiveKit/Track/VideoTrack.swift index f35e3c50d..6598567cf 100644 --- a/Sources/LiveKit/Track/VideoTrack.swift +++ b/Sources/LiveKit/Track/VideoTrack.swift @@ -72,7 +72,7 @@ extension VideoTrackProtocol where Self: Track { } } -public extension VideoTrack { +public extension Track { /// The aspect ratio of the video track or 1 if the dimensions are not available. var aspectRatio: CGFloat { guard let dimensions else { return 1 } From d84ef9bf2eb64a9e89c59cba7711b06652d41808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:48:27 +0200 Subject: [PATCH 16/45] Errors --- Sources/LiveKit/Agent/Agent.swift | 1 + Sources/LiveKit/Agent/LocalMedia.swift | 9 +++++++++ Sources/LiveKit/Agent/Session.swift | 10 +++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index f2a23bf0b..a2708a4d8 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import Combine import Foundation @MainActor diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift index a8b721303..0e4a001ae 100644 --- a/Sources/LiveKit/Agent/LocalMedia.swift +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -15,6 +15,8 @@ */ @preconcurrency import AVFoundation +import Combine +import Foundation @MainActor open class LocalMedia: ObservableObject { @@ -22,6 +24,13 @@ open class LocalMedia: ObservableObject { public enum Error: LocalizedError { case mediaDevice(Swift.Error) + + public var errorDescription: String? { + switch self { + case let .mediaDevice(error): + "Media device error: \(error.localizedDescription)" + } + } } // MARK: - Devices diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 8f6c3737a..a1d9e3b67 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import Combine import Foundation import OrderedCollections @@ -27,7 +28,14 @@ open class Session: ObservableObject { case failedToSend(Swift.Error) public var errorDescription: String? { - "TODO" + switch self { + case .agentNotConnected: + "Agent not connected" + case let .failedToConnect(error): + "Failed to connect: \(error.localizedDescription)" + case let .failedToSend(error): + "Failed to send: \(error.localizedDescription)" + } } } From 4fcd6510f7054e8894cef7a3e08acbceccf79290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:53:56 +0200 Subject: [PATCH 17/45] Sendable --- .../LiveKit/Support/ObservableObject+.swift | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Sources/LiveKit/Support/ObservableObject+.swift b/Sources/LiveKit/Support/ObservableObject+.swift index 68a3d7a18..3da521406 100644 --- a/Sources/LiveKit/Support/ObservableObject+.swift +++ b/Sources/LiveKit/Support/ObservableObject+.swift @@ -18,19 +18,13 @@ extension ObservableObject { /// An async sequence that emits the `objectWillChange` events. - var changes: any AsyncSequence { - if #available(macOS 12.0, iOS 15.0, tvOS 15.0, *) { - // This is necessary due to ObservableObjectPublisher not respecting the demand. - // See: https://forums.swift.org/t/asyncpublisher-causes-crash-in-rather-simple-situation - objectWillChange.buffer(size: 1, prefetch: .byRequest, whenFull: .dropOldest).values - } else { - AsyncStream { continuation in - let cancellable = objectWillChange.sink { _ in - continuation.yield() - } - continuation.onTermination = { _ in - cancellable.cancel() - } + var changes: AsyncStream { + AsyncStream { continuation in + let cancellable = objectWillChange.sink { _ in + continuation.yield() + } + continuation.onTermination = { _ in + cancellable.cancel() } } } From d5e64379e096becce8598d5733f02276f948ab4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:43:28 +0200 Subject: [PATCH 18/45] Older Swift --- .../Chat/Receive/TranscriptionStreamReceiver.swift | 11 ++++++----- Sources/LiveKit/Agent/LocalMedia.swift | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift index 758ce5043..bec7674cc 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionStreamReceiver.swift @@ -89,17 +89,18 @@ actor TranscriptionStreamReceiver: MessageReceiver, Loggable { func messages() async throws -> AsyncStream { let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) + let topic = topic + try await room.registerTextStreamHandler(for: topic) { [weak self] reader, participantIdentity in - guard let self else { return } for try await message in reader where !message.isEmpty { + guard let self else { return } await continuation.yield(processIncoming(partialMessage: message, reader: reader, participantIdentity: participantIdentity)) } } - continuation.onTermination = { [weak self] _ in - Task { - guard let self else { return } - await self.room.unregisterTextStreamHandler(for: self.topic) + continuation.onTermination = { _ in + Task { [weak self] in + await self?.room.unregisterTextStreamHandler(for: topic) } } diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/Agent/LocalMedia.swift index 0e4a001ae..fe2c09ed3 100644 --- a/Sources/LiveKit/Agent/LocalMedia.swift +++ b/Sources/LiveKit/Agent/LocalMedia.swift @@ -96,8 +96,8 @@ open class LocalMedia: ObservableObject { try await AudioManager.shared.setRecordingAlwaysPreparedMode(true) } - AudioManager.shared.onDeviceUpdate = { [weak self] _ in - Task { @MainActor in + AudioManager.shared.onDeviceUpdate = { _ in + Task { @MainActor [weak self] in self?.audioDevices = AudioManager.shared.inputDevices self?.selectedAudioDeviceID = AudioManager.shared.defaultInputDevice.deviceId } From 9cf68ff6fdb3d697e675e94301a300593f7c0212 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 17 Oct 2025 15:10:52 +0200 Subject: [PATCH 19/45] CR: Session.withAgent factory --- Sources/LiveKit/Agent/Session.swift | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index a1d9e3b67..1744e891e 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -126,18 +126,18 @@ open class Session: ObservableObject { receivers: receivers) } - public convenience init(agentName: String, - agentMetadata: String? = nil, - tokenSource: any TokenSourceConfigurable, - options: SessionOptions = .init(), - senders: [any MessageSender]? = nil, - receivers: [any MessageReceiver]? = nil) + public static func withAgent(_ agentName: String, + agentMetadata: String? = nil, + tokenSource: any TokenSourceConfigurable, + options: SessionOptions = .init(), + senders: [any MessageSender]? = nil, + receivers: [any MessageReceiver]? = nil) -> Session { - self.init(tokenSource: tokenSource, - tokenOptions: .init(agentName: agentName, agentMetadata: agentMetadata), - options: options, - senders: senders, - receivers: receivers) + Session(tokenSource: tokenSource, + tokenOptions: .init(agentName: agentName, agentMetadata: agentMetadata), + options: options, + senders: senders, + receivers: receivers) } deinit { From ae38145c2b952cd0d98f45256f7f97b1f1555384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 21 Oct 2025 12:20:50 +0200 Subject: [PATCH 20/45] CR: Don't expose multiple agents --- Sources/LiveKit/Agent/Agent.swift | 2 + .../LiveKit/Agent/Session+Environment.swift | 80 ------------------- Sources/LiveKit/Agent/Session.swift | 13 +-- ...ion+Options.swift => SessionOptions.swift} | 0 4 files changed, 9 insertions(+), 86 deletions(-) delete mode 100644 Sources/LiveKit/Agent/Session+Environment.swift rename Sources/LiveKit/Agent/{Session+Options.swift => SessionOptions.swift} (100%) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index a2708a4d8..2e2c1c081 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -19,6 +19,8 @@ import Foundation @MainActor open class Agent: ObservableObject { + public typealias Identity = Participant.Identity + @Published public private(set) var state: AgentState = .idle @Published public private(set) var audioTrack: (any AudioTrack)? diff --git a/Sources/LiveKit/Agent/Session+Environment.swift b/Sources/LiveKit/Agent/Session+Environment.swift deleted file mode 100644 index bdf6563c8..000000000 --- a/Sources/LiveKit/Agent/Session+Environment.swift +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2025 LiveKit - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import SwiftUI - -#if swift(>=6.0) -public extension EnvironmentValues { - @Entry var agentName: String? = nil -} -#else -public struct AgentNameKey: EnvironmentKey { - public static let defaultValue: String? = nil -} - -public extension EnvironmentValues { - var agentName: String? { - get { self[AgentNameKey.self] } - set { self[AgentNameKey.self] = newValue } - } -} -#endif - -@MainActor -@propertyWrapper -public struct LiveKitSession: DynamicProperty { - @EnvironmentObject private var session: Session - - public init() {} - - public var wrappedValue: Session { - session - } -} - -@MainActor -@propertyWrapper -public struct LiveKitLocalMedia: DynamicProperty { - @EnvironmentObject private var localMedia: LocalMedia - - public init() {} - - public var wrappedValue: LocalMedia { - localMedia - } -} - -@MainActor -@propertyWrapper -public struct LiveKitAgent: DynamicProperty { - @EnvironmentObject private var session: Session - @Environment(\.agentName) private var environmentName - - let agentName: String? - - public init(_ agentName: String? = nil) { - self.agentName = agentName - } - - public var wrappedValue: Agent? { - if let agentName { - return session.agent(named: agentName) - } else if let environmentName { - return session.agent(named: environmentName) - } - return session.agents.values.first - } -} diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 1744e891e..513926428 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -57,8 +57,9 @@ open class Session: ObservableObject { } } - @Published public private(set) var agents: [Participant.Identity: Agent] = [:] - public var hasAgents: Bool { !agents.isEmpty } + @Published private var agents: [Agent.Identity: Agent] = [:] + public var agent: Agent? { agents.values.first } + public var hasAgent: Bool { !agents.isEmpty } @Published public private(set) var messages: OrderedDictionary = [:] @@ -185,12 +186,12 @@ open class Session: ObservableObject { // MARK: - Agents - public func agent(named name: String) -> Agent? { - agents.values.first { $0.participant.attributes["lk.agent_name"] == name || $0.participant.identity?.stringValue == name } + private func agent(named agentName: String) -> Agent? { + agents.values.first { $0.participant.attributes["lk.agent_name"] == agentName } } - public subscript(name: String) -> Agent? { - agent(named: name) + private subscript(agentName: String) -> Agent? { + agent(named: agentName) } // MARK: - Lifecycle diff --git a/Sources/LiveKit/Agent/Session+Options.swift b/Sources/LiveKit/Agent/SessionOptions.swift similarity index 100% rename from Sources/LiveKit/Agent/Session+Options.swift rename to Sources/LiveKit/Agent/SessionOptions.swift From 23277455c1bb6b6c4d78019a8559ccdc1b59a70c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 21 Oct 2025 13:05:02 +0200 Subject: [PATCH 21/45] Naming --- Sources/LiveKit/Agent/Session.swift | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 513926428..fdfc6f00d 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -20,6 +20,8 @@ import OrderedCollections @MainActor open class Session: ObservableObject { + private static let agentNameAttribute = "lk.agent_name" + // MARK: - Error public enum Error: LocalizedError { @@ -44,11 +46,11 @@ open class Session: ObservableObject { @Published public private(set) var error: Error? @Published public private(set) var connectionState: ConnectionState = .disconnected - @Published public private(set) var isListening = false + @Published public private(set) var bufferingSpeechLocally = false public var isReady: Bool { switch connectionState { - case .disconnected where isListening, - .connecting where isListening, + case .disconnected where bufferingSpeechLocally, + .connecting where bufferingSpeechLocally, .connected, .reconnecting: true @@ -61,7 +63,8 @@ open class Session: ObservableObject { public var agent: Agent? { agents.values.first } public var hasAgent: Bool { !agents.isEmpty } - @Published public private(set) var messages: OrderedDictionary = [:] + @Published private var messagesDict: OrderedDictionary = [:] + public var messages: [ReceivedMessage] { messagesDict.values.elements } // MARK: - Dependencies @@ -178,7 +181,7 @@ open class Session: ObservableObject { Task { [weak self] in for await message in try await receiver.messages() { guard let self else { return } - messages.updateValue(message, forKey: message.id) + messagesDict.updateValue(message, forKey: message.id) } } } @@ -187,7 +190,7 @@ open class Session: ObservableObject { // MARK: - Agents private func agent(named agentName: String) -> Agent? { - agents.values.first { $0.participant.attributes["lk.agent_name"] == agentName } + agents.values.first { $0.participant.attributes[Self.agentNameAttribute] == agentName } } private subscript(agentName: String) -> Agent? { @@ -220,10 +223,10 @@ open class Session: ObservableObject { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { - await MainActor.run { self.isListening = true } + await MainActor.run { self.bufferingSpeechLocally = true } try await self.room.connect(url: response.serverURL.absoluteString, token: response.participantToken) - await MainActor.run { self.isListening = false } + await MainActor.run { self.bufferingSpeechLocally = false } } } else { try await room.connect(url: response.serverURL.absoluteString, @@ -258,11 +261,11 @@ open class Session: ObservableObject { } public func getMessageHistory() -> [ReceivedMessage] { - messages.values.elements + messages } public func restoreMessageHistory(_ messages: [ReceivedMessage]) { - self.messages = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) }) + messagesDict = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) }) } // MARK: - Helpers From ddf20a54099709e5f3e32233fae9a9879dfbe8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Tue, 21 Oct 2025 15:01:08 +0200 Subject: [PATCH 22/45] Use ordered dict --- Sources/LiveKit/Agent/Session.swift | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index fdfc6f00d..460133869 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -59,9 +59,9 @@ open class Session: ObservableObject { } } - @Published private var agents: [Agent.Identity: Agent] = [:] - public var agent: Agent? { agents.values.first } - public var hasAgent: Bool { !agents.isEmpty } + @Published private var agentsDict: OrderedDictionary = [:] + public var agent: Agent? { agentsDict.values.first } + public var hasAgent: Bool { !agentsDict.isEmpty } @Published private var messagesDict: OrderedDictionary = [:] public var messages: [ReceivedMessage] { messagesDict.values.elements } @@ -162,10 +162,10 @@ open class Session: ObservableObject { private func updateAgents(in room: Room) { let agentParticipants = room.agentParticipants - var newAgents: [Participant.Identity: Agent] = [:] + var newAgents: OrderedDictionary = [:] for (identity, participant) in agentParticipants { - if let existingAgent = agents[identity] { + if let existingAgent = agentsDict[identity] { newAgents[identity] = existingAgent } else { let newAgent = Agent(participant: participant) @@ -173,7 +173,7 @@ open class Session: ObservableObject { } } - agents = newAgents + agentsDict = newAgents } private func observe(receivers: [any MessageReceiver]) { @@ -190,7 +190,7 @@ open class Session: ObservableObject { // MARK: - Agents private func agent(named agentName: String) -> Agent? { - agents.values.first { $0.participant.attributes[Self.agentNameAttribute] == agentName } + agentsDict.values.first { $0.participant.attributes[Self.agentNameAttribute] == agentName } } private subscript(agentName: String) -> Agent? { @@ -212,7 +212,7 @@ open class Session: ObservableObject { try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) try Task.checkCancellation() guard let self else { return } - if connectionState == .connected, agents.isEmpty { + if connectionState == .connected, !hasAgent { self.error = .agentNotConnected } } From a93fcf0199d2b88e7b44c0d7f5629ce6ea14c171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Wed, 22 Oct 2025 13:34:08 +0200 Subject: [PATCH 23/45] Alt design: Agent struct/enum --- Sources/LiveKit/Agent/Agent.swift | 55 +++++++++++++++++------------ Sources/LiveKit/Agent/Session.swift | 48 +++++++------------------ 2 files changed, 45 insertions(+), 58 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 2e2c1c081..860607aad 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -14,39 +14,50 @@ * limitations under the License. */ -import Combine import Foundation -@MainActor -open class Agent: ObservableObject { - public typealias Identity = Participant.Identity +public enum Agent { + public enum Error: LocalizedError { + case timeout - @Published public private(set) var state: AgentState = .idle - - @Published public private(set) var audioTrack: (any AudioTrack)? - @Published public private(set) var avatarVideoTrack: (any VideoTrack)? + public var errorDescription: String? { + switch self { + case .timeout: + "Agent not connected" + } + } + } - public let participant: Participant + case disconnected + case connecting + case connected(AgentState, (any AudioTrack)?, (any VideoTrack)?) + case failed(Error) - public init(participant: Participant) { - self.participant = participant - observe(participant) + public var isConnected: Bool { + switch self { + case .connected: true + default: false + } } - private func observe(_ participant: Participant) { - Task { [weak self] in - for try await _ in participant.changes { - guard let self else { return } + public var audioTrack: (any AudioTrack)? { + switch self { + case let .connected(_, audioTrack, _): audioTrack + default: nil + } + } - state = participant.agentState - updateTracks(of: participant) - } + public var avatarVideoTrack: (any VideoTrack)? { + switch self { + case let .connected(_, _, avatarVideoTrack): avatarVideoTrack + default: nil } } - private func updateTracks(of participant: Participant) { - audioTrack = participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack - avatarVideoTrack = participant.avatarWorker?.firstCameraVideoTrack + static func connected(participant: Participant) -> Agent { + .connected(participant.agentState, + participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, + participant.avatarWorker?.firstCameraVideoTrack) } } diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 460133869..913572e22 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -25,14 +25,11 @@ open class Session: ObservableObject { // MARK: - Error public enum Error: LocalizedError { - case agentNotConnected case failedToConnect(Swift.Error) case failedToSend(Swift.Error) public var errorDescription: String? { switch self { - case .agentNotConnected: - "Agent not connected" case let .failedToConnect(error): "Failed to connect: \(error.localizedDescription)" case let .failedToSend(error): @@ -46,22 +43,16 @@ open class Session: ObservableObject { @Published public private(set) var error: Error? @Published public private(set) var connectionState: ConnectionState = .disconnected - @Published public private(set) var bufferingSpeechLocally = false - public var isReady: Bool { + public var isConnected: Bool { switch connectionState { - case .disconnected where bufferingSpeechLocally, - .connecting where bufferingSpeechLocally, - .connected, - .reconnecting: + case .connecting, .connected: true default: false } } - @Published private var agentsDict: OrderedDictionary = [:] - public var agent: Agent? { agentsDict.values.first } - public var hasAgent: Bool { !agentsDict.isEmpty } + @Published public private(set) var agent: Agent = .disconnected @Published private var messagesDict: OrderedDictionary = [:] public var messages: [ReceivedMessage] { messagesDict.values.elements } @@ -162,18 +153,14 @@ open class Session: ObservableObject { private func updateAgents(in room: Room) { let agentParticipants = room.agentParticipants - var newAgents: OrderedDictionary = [:] - - for (identity, participant) in agentParticipants { - if let existingAgent = agentsDict[identity] { - newAgents[identity] = existingAgent - } else { - let newAgent = Agent(participant: participant) - newAgents[identity] = newAgent - } + if agentParticipants.isEmpty, !agent.isConnected { + agent = .connecting + return } - agentsDict = newAgents + if let firstAgent = agentParticipants.values.first { + agent = .connected(participant: firstAgent) + } } private func observe(receivers: [any MessageReceiver]) { @@ -187,16 +174,6 @@ open class Session: ObservableObject { } } - // MARK: - Agents - - private func agent(named agentName: String) -> Agent? { - agentsDict.values.first { $0.participant.attributes[Self.agentNameAttribute] == agentName } - } - - private subscript(agentName: String) -> Agent? { - agent(named: agentName) - } - // MARK: - Lifecycle public func start() async { @@ -212,8 +189,8 @@ open class Session: ObservableObject { try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) try Task.checkCancellation() guard let self else { return } - if connectionState == .connected, !hasAgent { - self.error = .agentNotConnected + if isConnected, !agent.isConnected { + self.agent = .failed(.timeout) } } } @@ -223,10 +200,9 @@ open class Session: ObservableObject { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { - await MainActor.run { self.bufferingSpeechLocally = true } + await MainActor.run { self.agent = .connected(.listening, nil, nil) } try await self.room.connect(url: response.serverURL.absoluteString, token: response.participantToken) - await MainActor.run { self.bufferingSpeechLocally = false } } } else { try await room.connect(url: response.serverURL.absoluteString, From 961b7c4c91a01c507c30e4fa89233863b5c3558d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Wed, 22 Oct 2025 16:28:48 +0200 Subject: [PATCH 24/45] Discussion: update logic from JS --- Sources/LiveKit/Agent/Agent.swift | 45 ++++++++++++++++++++++------- Sources/LiveKit/Agent/Session.swift | 19 ++++++------ 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 860607aad..0c3790db8 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -16,7 +16,7 @@ import Foundation -public enum Agent { +public struct Agent { public enum Error: LocalizedError { case timeout @@ -28,36 +28,59 @@ public enum Agent { } } - case disconnected - case connecting - case connected(AgentState, (any AudioTrack)?, (any VideoTrack)?) - case failed(Error) + public enum State { + case disconnected + case connecting + case connected(AgentState, (any AudioTrack)?, (any VideoTrack)?) + case failed(Error) + } + + public var state: State = .disconnected public var isConnected: Bool { - switch self { + switch state { case .connected: true default: false } } public var audioTrack: (any AudioTrack)? { - switch self { + switch state { case let .connected(_, audioTrack, _): audioTrack default: nil } } public var avatarVideoTrack: (any VideoTrack)? { - switch self { + switch state { case let .connected(_, _, avatarVideoTrack): avatarVideoTrack default: nil } } + public var error: Error? { + switch state { + case let .failed(error): error + default: nil + } + } + + static func connecting() -> Agent { + Agent(state: .connecting) + } + + static func failed(_ error: Error) -> Agent { + Agent(state: .failed(error)) + } + + static func listening() -> Agent { + Agent(state: .connected(.listening, nil, nil)) + } + static func connected(participant: Participant) -> Agent { - .connected(participant.agentState, - participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, - participant.avatarWorker?.firstCameraVideoTrack) + Agent(state: .connected(participant.agentState, + participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, + participant.avatarWorker?.firstCameraVideoTrack)) } } diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 913572e22..d4bd3b9af 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -52,7 +52,7 @@ open class Session: ObservableObject { } } - @Published public private(set) var agent: Agent = .disconnected + @Published public private(set) var agent = Agent() @Published private var messagesDict: OrderedDictionary = [:] public var messages: [ReceivedMessage] { messagesDict.values.elements } @@ -145,22 +145,23 @@ open class Session: ObservableObject { guard let self else { return } connectionState = room.connectionState - updateAgents(in: room) + agent = updatedAgent(in: room) } } } - private func updateAgents(in room: Room) { - let agentParticipants = room.agentParticipants + private func updatedAgent(in room: Room) -> Agent { + var agent = Agent() - if agentParticipants.isEmpty, !agent.isConnected { - agent = .connecting - return + if connectionState != .disconnected { + agent = .connecting() } - if let firstAgent = agentParticipants.values.first { + if let firstAgent = room.agentParticipants.values.first { agent = .connected(participant: firstAgent) } + + return agent } private func observe(receivers: [any MessageReceiver]) { @@ -200,7 +201,7 @@ open class Session: ObservableObject { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { - await MainActor.run { self.agent = .connected(.listening, nil, nil) } + await MainActor.run { self.agent = .listening() } try await self.room.connect(url: response.serverURL.absoluteString, token: response.participantToken) } From 15509a98294ca4607b081ff73a60c00aea48c595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Wed, 22 Oct 2025 16:32:47 +0200 Subject: [PATCH 25/45] Expose state again --- Sources/LiveKit/Agent/Agent.swift | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 0c3790db8..ff556bfee 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -28,14 +28,14 @@ public struct Agent { } } - public enum State { + enum State { case disconnected case connecting case connected(AgentState, (any AudioTrack)?, (any VideoTrack)?) case failed(Error) } - public var state: State = .disconnected + var state: State = .disconnected public var isConnected: Bool { switch state { @@ -44,6 +44,13 @@ public struct Agent { } } + public var agentState: AgentState? { + switch state { + case let .connected(agentState, _, _): agentState + default: nil + } + } + public var audioTrack: (any AudioTrack)? { switch state { case let .connected(_, audioTrack, _): audioTrack From 00d74cd2164f212f46b091a3cdac3b522fdab52b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Wed, 22 Oct 2025 16:35:25 +0200 Subject: [PATCH 26/45] Labels --- Sources/LiveKit/Agent/Agent.swift | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index ff556bfee..96e94628e 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -31,11 +31,15 @@ public struct Agent { enum State { case disconnected case connecting - case connected(AgentState, (any AudioTrack)?, (any VideoTrack)?) + case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?) case failed(Error) } - var state: State = .disconnected + let state: State + + init(state: State = .disconnected) { + self.state = state + } public var isConnected: Bool { switch state { @@ -81,13 +85,13 @@ public struct Agent { } static func listening() -> Agent { - Agent(state: .connected(.listening, nil, nil)) + Agent(state: .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil)) } static func connected(participant: Participant) -> Agent { - Agent(state: .connected(participant.agentState, - participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, - participant.avatarWorker?.firstCameraVideoTrack)) + Agent(state: .connected(agentState: participant.agentState, + audioTrack: participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, + avatarVideoTrack: participant.avatarWorker?.firstCameraVideoTrack)) } } From 24e6f18a73cfde6f7bc5c25a0748ad972b686fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 08:39:59 +0200 Subject: [PATCH 27/45] Move --- Sources/LiveKit/{Agent => SwiftUI}/LocalMedia.swift | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Sources/LiveKit/{Agent => SwiftUI}/LocalMedia.swift (100%) diff --git a/Sources/LiveKit/Agent/LocalMedia.swift b/Sources/LiveKit/SwiftUI/LocalMedia.swift similarity index 100% rename from Sources/LiveKit/Agent/LocalMedia.swift rename to Sources/LiveKit/SwiftUI/LocalMedia.swift From 8ebbc9c969a8181311acfa923e1b407406cfea8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 08:55:04 +0200 Subject: [PATCH 28/45] Mutable state with explicit transitions --- Sources/LiveKit/Agent/Agent.swift | 70 +++++++++++++++++++---------- Sources/LiveKit/Agent/Session.swift | 10 ++--- 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 96e94628e..3eaa97192 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -16,7 +16,9 @@ import Foundation -public struct Agent { +public struct Agent: Loggable { + // MARK: - Error + public enum Error: LocalizedError { case timeout @@ -28,19 +30,59 @@ public struct Agent { } } - enum State { + // MARK: - State + + private enum State { case disconnected case connecting case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?) case failed(Error) } - let state: State + private var state: State = .disconnected + + // MARK: - Transitions + + mutating func connecting() { + switch state { + case .disconnected: + state = .connecting + default: + log("Invalid transition from \(state) to connecting", .warning) + } + } + + mutating func listening() { + switch state { + case .disconnected, .connecting: + state = .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil) + default: + log("Invalid transition from \(state) to listening", .warning) + } + } + + mutating func connected(participant: Participant) { + switch state { + case .connecting, .connected: + state = .connected(agentState: participant.agentState, + audioTrack: participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, + avatarVideoTrack: participant.avatarWorker?.firstCameraVideoTrack) + default: + log("Invalid transition from \(state) to connected", .warning) + } + } - init(state: State = .disconnected) { - self.state = state + mutating func failed(_ error: Error) { + switch state { + case .disconnected, .connecting, .connected: + state = .failed(error) + default: + log("Invalid transition from \(state) to failed", .warning) + } } + // MARK: - Public + public var isConnected: Bool { switch state { case .connected: true @@ -75,24 +117,6 @@ public struct Agent { default: nil } } - - static func connecting() -> Agent { - Agent(state: .connecting) - } - - static func failed(_ error: Error) -> Agent { - Agent(state: .failed(error)) - } - - static func listening() -> Agent { - Agent(state: .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil)) - } - - static func connected(participant: Participant) -> Agent { - Agent(state: .connected(agentState: participant.agentState, - audioTrack: participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, - avatarVideoTrack: participant.avatarWorker?.firstCameraVideoTrack)) - } } extension AgentState: CustomStringConvertible { diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index d4bd3b9af..0764ba74d 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -38,7 +38,7 @@ open class Session: ObservableObject { } } - // MARK: - State + // MARK: - Published @Published public private(set) var error: Error? @@ -154,11 +154,11 @@ open class Session: ObservableObject { var agent = Agent() if connectionState != .disconnected { - agent = .connecting() + agent.connecting() } if let firstAgent = room.agentParticipants.values.first { - agent = .connected(participant: firstAgent) + agent.connected(participant: firstAgent) } return agent @@ -191,7 +191,7 @@ open class Session: ObservableObject { try Task.checkCancellation() guard let self else { return } if isConnected, !agent.isConnected { - self.agent = .failed(.timeout) + self.agent.failed(.timeout) } } } @@ -201,7 +201,7 @@ open class Session: ObservableObject { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { - await MainActor.run { self.agent = .listening() } + await MainActor.run { self.agent.listening() } try await self.room.connect(url: response.serverURL.absoluteString, token: response.participantToken) } From 095ffe76cb5ec534836be1e7f23fa05579a49ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:09:09 +0200 Subject: [PATCH 29/45] Ext --- Sources/LiveKit/Agent/Agent.swift | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 3eaa97192..a4c2f9dac 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -65,8 +65,8 @@ public struct Agent: Loggable { switch state { case .connecting, .connected: state = .connected(agentState: participant.agentState, - audioTrack: participant.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack, - avatarVideoTrack: participant.avatarWorker?.firstCameraVideoTrack) + audioTrack: participant.agentAudioTrack, + avatarVideoTrack: participant.avatarVideoTrack) default: log("Invalid transition from \(state) to connected", .warning) } @@ -119,6 +119,16 @@ public struct Agent: Loggable { } } +private extension Participant { + var agentAudioTrack: (any AudioTrack)? { + audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack + } + + var avatarVideoTrack: (any VideoTrack)? { + avatarWorker?.firstCameraVideoTrack + } +} + extension AgentState: CustomStringConvertible { public var description: String { rawValue.capitalized From a2654a0677412dfe1563e8cb2db721cb1dd36804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 10:23:59 +0200 Subject: [PATCH 30/45] State machine improvements --- Sources/LiveKit/Agent/Agent.swift | 28 +++++++++++-------- Sources/LiveKit/Agent/Session.swift | 20 ++++++------- .../LiveKit/Support/ObservableObject+.swift | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index a4c2f9dac..5b1d98b95 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -43,9 +43,22 @@ public struct Agent: Loggable { // MARK: - Transitions + mutating func disconnected() { + log("Agent disconnected from \(state)", .debug) + // From any state + state = .disconnected + } + + mutating func failed(_ error: Error) { + log("Agent failed with error \(error) from \(state)") + // From any state + state = .failed(error) + } + mutating func connecting() { + log("Agent connecting from \(state)") switch state { - case .disconnected: + case .disconnected, .connecting, .connected: // pre-connect is listening (connected) state = .connecting default: log("Invalid transition from \(state) to connecting", .warning) @@ -53,8 +66,9 @@ public struct Agent: Loggable { } mutating func listening() { + log("Agent listening from \(state)") switch state { - case .disconnected, .connecting: + case .disconnected: state = .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil) default: log("Invalid transition from \(state) to listening", .warning) @@ -62,6 +76,7 @@ public struct Agent: Loggable { } mutating func connected(participant: Participant) { + log("Agent connected to \(participant) from \(state)") switch state { case .connecting, .connected: state = .connected(agentState: participant.agentState, @@ -72,15 +87,6 @@ public struct Agent: Loggable { } } - mutating func failed(_ error: Error) { - switch state { - case .disconnected, .connecting, .connected: - state = .failed(error) - default: - log("Invalid transition from \(state) to failed", .warning) - } - } - // MARK: - Public public var isConnected: Bool { diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 0764ba74d..406b4e027 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -143,25 +143,21 @@ open class Session: ObservableObject { Task { [weak self] in for try await _ in room.changes { guard let self else { return } - - connectionState = room.connectionState - agent = updatedAgent(in: room) + updateAgent(in: room) } } } - private func updatedAgent(in room: Room) -> Agent { - var agent = Agent() - - if connectionState != .disconnected { - agent.connecting() - } + private func updateAgent(in room: Room) { + connectionState = room.connectionState - if let firstAgent = room.agentParticipants.values.first { + if connectionState == .disconnected { + agent.disconnected() + } else if let firstAgent = room.agentParticipants.values.first { agent.connected(participant: firstAgent) + } else { + agent.connecting() } - - return agent } private func observe(receivers: [any MessageReceiver]) { diff --git a/Sources/LiveKit/Support/ObservableObject+.swift b/Sources/LiveKit/Support/ObservableObject+.swift index 3da521406..8be4f1558 100644 --- a/Sources/LiveKit/Support/ObservableObject+.swift +++ b/Sources/LiveKit/Support/ObservableObject+.swift @@ -17,7 +17,7 @@ @preconcurrency import Combine extension ObservableObject { - /// An async sequence that emits the `objectWillChange` events. + /// An async stream that emits the `objectWillChange` events. var changes: AsyncStream { AsyncStream { continuation in let cancellable = objectWillChange.sink { _ in From 44e2af98f7e42fcaf69ab3ed8c86d3d3a7fb1aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 11:00:08 +0200 Subject: [PATCH 31/45] Cmts --- Sources/LiveKit/Agent/Agent.swift | 23 +++++++- .../Agent/Chat/Receive/MessageReceiver.swift | 2 - .../Agent/Chat/Send/MessageSender.swift | 2 - Sources/LiveKit/Agent/Session.swift | 56 +++++++++++++++++++ Sources/LiveKit/Agent/SessionOptions.swift | 8 +++ Sources/LiveKit/SwiftUI/LocalMedia.swift | 33 +++++++++++ 6 files changed, 118 insertions(+), 6 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index 5b1d98b95..cbbdd7a01 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -16,6 +16,20 @@ import Foundation +/// Represents a LiveKit Agent. +/// +/// The ``Agent`` struct represents the state of a LiveKit agent within a ``Session``. +/// It provides information about the agent's connection status, its current state +/// (e.g., listening, thinking, speaking), and its media tracks. +/// +/// The ``Agent``'s properties are updated automatically by the ``Session`` as the agent's +/// state changes. This allows the application to react to the agent's +/// behavior, such as displaying its avatar video or indicating when it is speaking. +/// The ``agentState`` property is particularly useful for building UIs that reflect +/// the agent's current activity. +/// +/// - SeeAlso: [LiveKit SwiftUI Agent Starter](https://github.com/livekit-examples/agent-starter-swift). +/// - SeeAlso: [LiveKit Agents documentation](https://docs.livekit.io/agents/). public struct Agent: Loggable { // MARK: - Error @@ -25,7 +39,7 @@ public struct Agent: Loggable { public var errorDescription: String? { switch self { case .timeout: - "Agent not connected" + "Agent did not connect" } } } @@ -44,7 +58,7 @@ public struct Agent: Loggable { // MARK: - Transitions mutating func disconnected() { - log("Agent disconnected from \(state)", .debug) + log("Agent disconnected from \(state)") // From any state state = .disconnected } @@ -89,6 +103,7 @@ public struct Agent: Loggable { // MARK: - Public + /// A boolean value indicating whether the agent is connected. public var isConnected: Bool { switch state { case .connected: true @@ -96,6 +111,7 @@ public struct Agent: Loggable { } } + /// The current conversational state of the agent. public var agentState: AgentState? { switch state { case let .connected(agentState, _, _): agentState @@ -103,6 +119,7 @@ public struct Agent: Loggable { } } + /// The agent's audio track. public var audioTrack: (any AudioTrack)? { switch state { case let .connected(_, audioTrack, _): audioTrack @@ -110,6 +127,7 @@ public struct Agent: Loggable { } } + /// The agent's avatar video track. public var avatarVideoTrack: (any VideoTrack)? { switch state { case let .connected(_, _, avatarVideoTrack): avatarVideoTrack @@ -117,6 +135,7 @@ public struct Agent: Loggable { } } + /// The last error that occurred. public var error: Error? { switch state { case let .failed(error): error diff --git a/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift index 2344be30e..20b7edec4 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/MessageReceiver.swift @@ -20,8 +20,6 @@ import Foundation /// /// A message receiver is responsible for creating a stream of messages from the agent. /// It is used to receive messages from the agent and update the message feed. -/// -/// - SeeAlso: ``ReceivedMessage`` public protocol MessageReceiver: Sendable { func messages() async throws -> AsyncStream } diff --git a/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift b/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift index fe78232c0..a9bdc86fb 100644 --- a/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift +++ b/Sources/LiveKit/Agent/Chat/Send/MessageSender.swift @@ -20,8 +20,6 @@ import Foundation /// /// A message sender is responsible for sending messages to the agent. /// It is used to send messages to the agent and update the message feed. -/// -/// - SeeAlso: ``SentMessage`` public protocol MessageSender: Sendable { func send(_ message: SentMessage) async throws } diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 406b4e027..f964633a6 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -18,6 +18,24 @@ import Combine import Foundation import OrderedCollections +/// A ``Session`` represents a connection to a LiveKit Room that can contain an ``Agent``. +/// +/// ``Session`` is the main entry point for interacting with a LiveKit agent. It encapsulates +/// the connection to a LiveKit ``Room``, manages the agent's lifecycle, and handles +/// communication between the user and the agent. +/// +/// ``Session`` is created with a token source and optional configuration. The ``start()`` +/// method establishes the connection, and the ``end()`` method terminates it. The session's +/// state, including connection status and any errors, is published for observation, +/// making it suitable for use in SwiftUI applications. +/// +/// Communication with the agent is handled through messages. The ``send(text:)`` method +/// sends a user message, and the ``messages`` property provides an ordered history of the +/// conversation. The session can be configured with custom message senders and receivers +/// to support different communication channels, such as text messages or transcription streams. +/// +/// - SeeAlso: [LiveKit SwiftUI Agent Starter](https://github.com/livekit-examples/agent-starter-swift). +/// - SeeAlso: [LiveKit Agents documentation](https://docs.livekit.io/agents/). @MainActor open class Session: ObservableObject { private static let agentNameAttribute = "lk.agent_name" @@ -40,9 +58,12 @@ open class Session: ObservableObject { // MARK: - Published + /// The last error that occurred. @Published public private(set) var error: Error? + /// The current connection state of the session. @Published public private(set) var connectionState: ConnectionState = .disconnected + /// A boolean value indicating whether the session is connected. public var isConnected: Bool { switch connectionState { case .connecting, .connected: @@ -52,13 +73,16 @@ open class Session: ObservableObject { } } + /// The ``Agent`` associated with this session. @Published public private(set) var agent = Agent() @Published private var messagesDict: OrderedDictionary = [:] + /// The ordered list of received messages. public var messages: [ReceivedMessage] { messagesDict.values.elements } // MARK: - Dependencies + /// The underlying ``Room`` object for the session. public let room: Room private enum TokenSourceConfiguration { @@ -98,6 +122,12 @@ open class Session: ObservableObject { observe(receivers: resolvedReceivers) } + /// Initializes a new ``Session`` with a fixed token source. + /// - Parameters: + /// - tokenSource: A token source that provides a fixed token. + /// - options: The session options. + /// - senders: An array of message senders. + /// - receivers: An array of message receivers. public convenience init(tokenSource: any TokenSourceFixed, options: SessionOptions = .init(), senders: [any MessageSender]? = nil, @@ -109,6 +139,13 @@ open class Session: ObservableObject { receivers: receivers) } + /// Initializes a new ``Session`` with a configurable token source. + /// - Parameters: + /// - tokenSource: A token source that can generate tokens with specific options. + /// - tokenOptions: The options for generating the token. + /// - options: The session options. + /// - senders: An array of message senders. + /// - receivers: An array of message receivers. public convenience init(tokenSource: any TokenSourceConfigurable, tokenOptions: TokenRequestOptions = .init(), options: SessionOptions = .init(), @@ -121,6 +158,15 @@ open class Session: ObservableObject { receivers: receivers) } + /// Creates a new ``Session`` configured for a specific agent. + /// - Parameters: + /// - agentName: The name of the agent to dispatch. + /// - agentMetadata: Metadata passed to the agent. + /// - tokenSource: A configurable token source. + /// - options: The session options. + /// - senders: An array of message senders. + /// - receivers: An array of message receivers. + /// - Returns: A new ``Session`` instance. public static func withAgent(_ agentName: String, agentMetadata: String? = nil, tokenSource: any TokenSourceConfigurable, @@ -173,6 +219,7 @@ open class Session: ObservableObject { // MARK: - Lifecycle + /// Starts the session. public func start() async { guard connectionState == .disconnected else { return } @@ -210,16 +257,21 @@ open class Session: ObservableObject { } } + /// Terminates the session. public func end() async { await room.disconnect() } + /// Resets the last error. public func resetError() { error = nil } // MARK: - Messages + /// Sends a text message. + /// - Parameter text: The text to send. + /// - Returns: The ``SentMessage`` that was sent. @discardableResult public func send(text: String) async -> SentMessage { let message = SentMessage(id: UUID().uuidString, timestamp: Date(), content: .userInput(text)) @@ -233,10 +285,14 @@ open class Session: ObservableObject { return message } + /// Gets the message history. + /// - Returns: An array of ``ReceivedMessage``. public func getMessageHistory() -> [ReceivedMessage] { messages } + /// Restores the message history. + /// - Parameter messages: An array of ``ReceivedMessage`` to restore. public func restoreMessageHistory(_ messages: [ReceivedMessage]) { messagesDict = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) }) } diff --git a/Sources/LiveKit/Agent/SessionOptions.swift b/Sources/LiveKit/Agent/SessionOptions.swift index 1c86aa61c..96bddf01c 100644 --- a/Sources/LiveKit/Agent/SessionOptions.swift +++ b/Sources/LiveKit/Agent/SessionOptions.swift @@ -16,9 +16,17 @@ import Foundation +/// Options for creating a ``Session``. public struct SessionOptions: Sendable { + /// The undelying ``Room`` object for the session. public var room: Room + /// Whether to enable audio pre-connect with ``PreConnectAudioBuffer``. + /// If enabled, the microphone will be enabled before connecting to the room. + /// Use ``LocalMedia`` or ``AudioManager/setRecordingAlwaysPreparedMode(_:)`` + /// to request microphone permissions early in the app lifecycle. public var preConnectAudio: Bool + /// The timeout for the agent to connect, in seconds. + /// If exceeded, the ``Agent`` will transition to a failed state. public var agentConnectTimeout: TimeInterval public init( diff --git a/Sources/LiveKit/SwiftUI/LocalMedia.swift b/Sources/LiveKit/SwiftUI/LocalMedia.swift index fe2c09ed3..92a24cde7 100644 --- a/Sources/LiveKit/SwiftUI/LocalMedia.swift +++ b/Sources/LiveKit/SwiftUI/LocalMedia.swift @@ -18,6 +18,11 @@ import Combine import Foundation +/// An ``ObservableObject`` that can be used to control the local participant's media devices. +/// +/// This class provides a convenient way to manage local media tracks, including enabling/disabling +/// microphone and camera, and selecting audio and video devices. It is designed to be used +/// in SwiftUI views. @MainActor open class LocalMedia: ObservableObject { // MARK: - Error @@ -35,22 +40,34 @@ open class LocalMedia: ObservableObject { // MARK: - Devices + /// The last error that occurred. @Published public private(set) var error: Error? + /// The local microphone track. @Published public private(set) var microphoneTrack: (any AudioTrack)? + /// The local camera track. @Published public private(set) var cameraTrack: (any VideoTrack)? + /// The local screen share track. @Published public private(set) var screenShareTrack: (any VideoTrack)? + /// A boolean value indicating whether the microphone is enabled. @Published public private(set) var isMicrophoneEnabled: Bool = false + /// A boolean value indicating whether the camera is enabled. @Published public private(set) var isCameraEnabled: Bool = false + /// A boolean value indicating whether screen sharing is enabled. @Published public private(set) var isScreenShareEnabled: Bool = false + /// The available audio input devices. @Published public private(set) var audioDevices: [AudioDevice] = AudioManager.shared.inputDevices + /// The ID of the selected audio input device. @Published public private(set) var selectedAudioDeviceID: String = AudioManager.shared.inputDevice.deviceId + /// The available video capture devices. @Published public private(set) var videoDevices: [AVCaptureDevice] = [] + /// The ID of the selected video capture device. @Published public private(set) var selectedVideoDeviceID: String? + /// A boolean value indicating whether the camera position can be switched. @Published public private(set) var canSwitchCamera = false // MARK: - Dependencies @@ -59,6 +76,8 @@ open class LocalMedia: ObservableObject { // MARK: - Initialization + /// Initializes a new ``LocalMedia`` object. + /// - Parameter localParticipant: The ``LocalParticipant`` to control. public init(localParticipant: LocalParticipant) { self.localParticipant = localParticipant @@ -66,10 +85,14 @@ open class LocalMedia: ObservableObject { observeDevices() } + /// Initializes a new ``LocalMedia`` object. + /// - Parameter room: The ``Room`` to control. public convenience init(room: Room) { self.init(localParticipant: room.localParticipant) } + /// Initializes a new ``LocalMedia`` object. + /// - Parameter session: The ``Session`` to control. public convenience init(session: Session) { self.init(room: session.room) } @@ -116,6 +139,7 @@ open class LocalMedia: ObservableObject { // MARK: - Toggle + /// Toggles the microphone on or off. public func toggleMicrophone() async { do { try await localParticipant.setMicrophone(enabled: !isMicrophoneEnabled) @@ -124,6 +148,8 @@ open class LocalMedia: ObservableObject { } } + /// Toggles the camera on or off. + /// - Parameter disableScreenShare: If `true`, screen sharing will be disabled when the camera is enabled. public func toggleCamera(disableScreenShare: Bool = false) async { let enable = !isCameraEnabled do { @@ -138,6 +164,8 @@ open class LocalMedia: ObservableObject { } } + /// Toggles screen sharing on or off. + /// - Parameter disableCamera: If `true`, the camera will be disabled when screen sharing is enabled. public func toggleScreenShare(disableCamera: Bool = false) async { let enable = !isScreenShareEnabled do { @@ -152,6 +180,8 @@ open class LocalMedia: ObservableObject { // MARK: - Select + /// Selects an audio input device. + /// - Parameter audioDevice: The ``AudioDevice`` to select. public func select(audioDevice: AudioDevice) { selectedAudioDeviceID = audioDevice.deviceId @@ -159,6 +189,8 @@ open class LocalMedia: ObservableObject { AudioManager.shared.inputDevice = device } + /// Selects a video capture device. + /// - Parameter videoDevice: The ``AVCaptureDevice`` to select. public func select(videoDevice: AVCaptureDevice) async { selectedVideoDeviceID = videoDevice.uniqueID @@ -167,6 +199,7 @@ open class LocalMedia: ObservableObject { _ = try? await cameraCapturer.set(options: captureOptions) } + /// Switches the camera position. public func switchCamera() async { guard let cameraCapturer = getCameraCapturer() else { return } _ = try? await cameraCapturer.switchCameraPosition() From 922058720867fc572cc21db407420bb02c5d6510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 11:58:40 +0200 Subject: [PATCH 32/45] Reconnect --- Sources/LiveKit/Agent/Session.swift | 4 ++-- Sources/LiveKit/SwiftUI/LocalMedia.swift | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index f964633a6..516f87b64 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -66,7 +66,7 @@ open class Session: ObservableObject { /// A boolean value indicating whether the session is connected. public var isConnected: Bool { switch connectionState { - case .connecting, .connected: + case .connecting, .connected, .reconnecting: // pre-connect is connecting true default: false @@ -263,7 +263,7 @@ open class Session: ObservableObject { } /// Resets the last error. - public func resetError() { + public func dismissError() { error = nil } diff --git a/Sources/LiveKit/SwiftUI/LocalMedia.swift b/Sources/LiveKit/SwiftUI/LocalMedia.swift index 92a24cde7..86c4be121 100644 --- a/Sources/LiveKit/SwiftUI/LocalMedia.swift +++ b/Sources/LiveKit/SwiftUI/LocalMedia.swift @@ -137,6 +137,11 @@ open class LocalMedia: ObservableObject { AudioManager.shared.onDeviceUpdate = nil } + /// Resets the last error. + public func dismissError() { + error = nil + } + // MARK: - Toggle /// Toggles the microphone on or off. From 624d58055f2a90999fb712a7c9f8283de4567d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 12:06:13 +0200 Subject: [PATCH 33/45] Change --- .changes/agent-session | 1 + 1 file changed, 1 insertion(+) create mode 100644 .changes/agent-session diff --git a/.changes/agent-session b/.changes/agent-session new file mode 100644 index 000000000..dcadce57b --- /dev/null +++ b/.changes/agent-session @@ -0,0 +1 @@ +minor type="added" "Agent and Session APIs for creating agent-based apps" From 6cb1a3857bda43588fc565e58d80213501ccd81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 12:14:12 +0200 Subject: [PATCH 34/45] Hide room conn state --- Sources/LiveKit/Agent/Session.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 516f87b64..3426344b9 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -62,7 +62,7 @@ open class Session: ObservableObject { @Published public private(set) var error: Error? /// The current connection state of the session. - @Published public private(set) var connectionState: ConnectionState = .disconnected + @Published private var connectionState: ConnectionState = .disconnected /// A boolean value indicating whether the session is connected. public var isConnected: Bool { switch connectionState { From 565db30bbf68314ac1e23f07edba06c96cc89c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:34:58 +0200 Subject: [PATCH 35/45] Move token inside preconnect --- Sources/LiveKit/Agent/Session.swift | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 3426344b9..114353861 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -239,18 +239,20 @@ open class Session: ObservableObject { } } - do { - let response = try await fetchToken() + let connect = { @Sendable in + let response = try await self.fetchToken() + try await self.room.connect(url: response.serverURL.absoluteString, + token: response.participantToken) + } + do { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { await MainActor.run { self.agent.listening() } - try await self.room.connect(url: response.serverURL.absoluteString, - token: response.participantToken) + try await connect() } } else { - try await room.connect(url: response.serverURL.absoluteString, - token: response.participantToken) + try await connect() } } catch { self.error = .failedToConnect(error) From 160f8fb4a86fc3def95b7f71e1ecaf26adf051a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 13:52:07 +0200 Subject: [PATCH 36/45] Fix pre-connect gap --- Sources/LiveKit/Agent/Session.swift | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 114353861..e3343c63c 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -248,7 +248,10 @@ open class Session: ObservableObject { do { if options.preConnectAudio { try await room.withPreConnectAudio(timeout: timeout) { - await MainActor.run { self.agent.listening() } + await MainActor.run { + self.connectionState = .connecting + self.agent.listening() + } try await connect() } } else { From a787606a2e68c926df855adc5e2b4d3fb01a3614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:01:00 +0200 Subject: [PATCH 37/45] Simplify pre-connect state --- Sources/LiveKit/Agent/Agent.swift | 24 +++++++----------------- Sources/LiveKit/Agent/Session.swift | 6 +++--- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/Sources/LiveKit/Agent/Agent.swift b/Sources/LiveKit/Agent/Agent.swift index cbbdd7a01..27a9d2166 100644 --- a/Sources/LiveKit/Agent/Agent.swift +++ b/Sources/LiveKit/Agent/Agent.swift @@ -48,9 +48,9 @@ public struct Agent: Loggable { private enum State { case disconnected - case connecting + case connecting(buffering: Bool) case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?) - case failed(Error) + case failed(error: Error) } private var state: State = .disconnected @@ -63,32 +63,22 @@ public struct Agent: Loggable { state = .disconnected } - mutating func failed(_ error: Error) { + mutating func failed(error: Error) { log("Agent failed with error \(error) from \(state)") // From any state - state = .failed(error) + state = .failed(error: error) } - mutating func connecting() { + mutating func connecting(buffering: Bool) { log("Agent connecting from \(state)") switch state { - case .disconnected, .connecting, .connected: // pre-connect is listening (connected) - state = .connecting + case .disconnected, .connecting: + state = .connecting(buffering: buffering) default: log("Invalid transition from \(state) to connecting", .warning) } } - mutating func listening() { - log("Agent listening from \(state)") - switch state { - case .disconnected: - state = .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil) - default: - log("Invalid transition from \(state) to listening", .warning) - } - } - mutating func connected(participant: Participant) { log("Agent connected to \(participant) from \(state)") switch state { diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index e3343c63c..b677be3f6 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -202,7 +202,7 @@ open class Session: ObservableObject { } else if let firstAgent = room.agentParticipants.values.first { agent.connected(participant: firstAgent) } else { - agent.connecting() + agent.connecting(buffering: options.preConnectAudio) } } @@ -234,7 +234,7 @@ open class Session: ObservableObject { try Task.checkCancellation() guard let self else { return } if isConnected, !agent.isConnected { - self.agent.failed(.timeout) + self.agent.failed(error: .timeout) } } } @@ -250,7 +250,7 @@ open class Session: ObservableObject { try await room.withPreConnectAudio(timeout: timeout) { await MainActor.run { self.connectionState = .connecting - self.agent.listening() + self.agent.connecting(buffering: true) } try await connect() } From 7a714531497013e2cc9365e9e2646254ff087017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:10:40 +0200 Subject: [PATCH 38/45] Enable mic w/o preconnect --- Sources/LiveKit/Agent/Session.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index b677be3f6..988ab39e4 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -255,7 +255,9 @@ open class Session: ObservableObject { try await connect() } } else { + agent.connecting(buffering: false) try await connect() + try await room.localParticipant.setMicrophone(enabled: true) } } catch { self.error = .failedToConnect(error) From 585036b367ed23281222082bb696b2d06ce235a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 08:45:55 +0200 Subject: [PATCH 39/45] Revert states on catch --- Sources/LiveKit/Agent/Session.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 988ab39e4..799b5e788 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -255,12 +255,15 @@ open class Session: ObservableObject { try await connect() } } else { + connectionState = .connecting agent.connecting(buffering: false) try await connect() try await room.localParticipant.setMicrophone(enabled: true) } } catch { self.error = .failedToConnect(error) + connectionState = .disconnected + agent.disconnected() } } From 385d2bb28400c8de599bfbd541eedaa876d471e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 12:04:28 +0200 Subject: [PATCH 40/45] Move wait --- Sources/LiveKit/Agent/Session.swift | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 799b5e788..3158ded98 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -228,17 +228,6 @@ open class Session: ObservableObject { let timeout = options.agentConnectTimeout - defer { - waitForAgentTask = Task { [weak self] in - try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) - try Task.checkCancellation() - guard let self else { return } - if isConnected, !agent.isConnected { - self.agent.failed(error: .timeout) - } - } - } - let connect = { @Sendable in let response = try await self.fetchToken() try await self.room.connect(url: response.serverURL.absoluteString, @@ -260,6 +249,15 @@ open class Session: ObservableObject { try await connect() try await room.localParticipant.setMicrophone(enabled: true) } + + waitForAgentTask = Task { [weak self] in + try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC))) + try Task.checkCancellation() + guard let self else { return } + if isConnected, !agent.isConnected { + agent.failed(error: .timeout) + } + } } catch { self.error = .failedToConnect(error) connectionState = .disconnected From 68464148d181c16665858cb7fcc182e583a56913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 12:10:04 +0200 Subject: [PATCH 41/45] Catch receiver errors --- Sources/LiveKit/Agent/Session.swift | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 3158ded98..1c5175261 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -45,6 +45,7 @@ open class Session: ObservableObject { public enum Error: LocalizedError { case failedToConnect(Swift.Error) case failedToSend(Swift.Error) + case receiverError(Swift.Error) public var errorDescription: String? { switch self { @@ -52,6 +53,8 @@ open class Session: ObservableObject { "Failed to connect: \(error.localizedDescription)" case let .failedToSend(error): "Failed to send: \(error.localizedDescription)" + case let .receiverError(error): + "Message receiver failed: \(error.localizedDescription)" } } } @@ -209,9 +212,13 @@ open class Session: ObservableObject { private func observe(receivers: [any MessageReceiver]) { for receiver in receivers { Task { [weak self] in - for await message in try await receiver.messages() { - guard let self else { return } - messagesDict.updateValue(message, forKey: message.id) + do { + for await message in try await receiver.messages() { + guard let self else { return } + messagesDict.updateValue(message, forKey: message.id) + } + } catch { + self?.error = .receiverError(error) } } } From 99e74b8a900677dc9e718f7a54c8b3684c937ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 12:20:21 +0200 Subject: [PATCH 42/45] Video device error handling --- Sources/LiveKit/SwiftUI/LocalMedia.swift | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Sources/LiveKit/SwiftUI/LocalMedia.swift b/Sources/LiveKit/SwiftUI/LocalMedia.swift index 86c4be121..e637001f9 100644 --- a/Sources/LiveKit/SwiftUI/LocalMedia.swift +++ b/Sources/LiveKit/SwiftUI/LocalMedia.swift @@ -197,11 +197,13 @@ open class LocalMedia: ObservableObject { /// Selects a video capture device. /// - Parameter videoDevice: The ``AVCaptureDevice`` to select. public func select(videoDevice: AVCaptureDevice) async { - selectedVideoDeviceID = videoDevice.uniqueID - guard let cameraCapturer = getCameraCapturer() else { return } - let captureOptions = CameraCaptureOptions(device: videoDevice) - _ = try? await cameraCapturer.set(options: captureOptions) + do { + try await cameraCapturer.set(options: .init(device: videoDevice)) + selectedVideoDeviceID = videoDevice.uniqueID + } catch { + self.error = .mediaDevice(error) + } } /// Switches the camera position. From d8cf47db57f3f998cf6465b03ad753d9a91f4297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:03:23 +0200 Subject: [PATCH 43/45] Rename errors --- Sources/LiveKit/Agent/Session.swift | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 1c5175261..23bfcc49b 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -43,17 +43,17 @@ open class Session: ObservableObject { // MARK: - Error public enum Error: LocalizedError { - case failedToConnect(Swift.Error) - case failedToSend(Swift.Error) - case receiverError(Swift.Error) + case connection(Swift.Error) + case sender(Swift.Error) + case receiver(Swift.Error) public var errorDescription: String? { switch self { - case let .failedToConnect(error): - "Failed to connect: \(error.localizedDescription)" - case let .failedToSend(error): - "Failed to send: \(error.localizedDescription)" - case let .receiverError(error): + case let .connection(error): + "Connection failed: \(error.localizedDescription)" + case let .sender(error): + "Message sender failed: \(error.localizedDescription)" + case let .receiver(error): "Message receiver failed: \(error.localizedDescription)" } } @@ -218,7 +218,7 @@ open class Session: ObservableObject { messagesDict.updateValue(message, forKey: message.id) } } catch { - self?.error = .receiverError(error) + self?.error = .receiver(error) } } } @@ -266,7 +266,7 @@ open class Session: ObservableObject { } } } catch { - self.error = .failedToConnect(error) + self.error = .connection(error) connectionState = .disconnected agent.disconnected() } @@ -295,7 +295,7 @@ open class Session: ObservableObject { try await sender.send(message) } } catch { - self.error = .failedToSend(error) + self.error = .sender(error) } return message } From b32b5dbfb5265627f0463513d9de3fc79d4c2382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:07:20 +0200 Subject: [PATCH 44/45] Optional message return --- Sources/LiveKit/Agent/Session.swift | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Sources/LiveKit/Agent/Session.swift b/Sources/LiveKit/Agent/Session.swift index 23bfcc49b..b8ba08af0 100644 --- a/Sources/LiveKit/Agent/Session.swift +++ b/Sources/LiveKit/Agent/Session.swift @@ -286,18 +286,19 @@ open class Session: ObservableObject { /// Sends a text message. /// - Parameter text: The text to send. - /// - Returns: The ``SentMessage`` that was sent. + /// - Returns: The ``SentMessage`` that was sent, or `nil` if the message failed to send. @discardableResult - public func send(text: String) async -> SentMessage { + public func send(text: String) async -> SentMessage? { let message = SentMessage(id: UUID().uuidString, timestamp: Date(), content: .userInput(text)) do { for sender in senders { try await sender.send(message) } + return message } catch { self.error = .sender(error) + return nil } - return message } /// Gets the message history. From ce657d4aaa472a6a76129dbcf0146dcdb3ae9dde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82az=CC=87ej=20Pankowski?= <86720177+pblazej@users.noreply.github.com> Date: Fri, 24 Oct 2025 15:28:01 +0200 Subject: [PATCH 45/45] Finish on deinit --- .../Agent/Chat/Receive/TranscriptionDelegateReceiver.swift | 1 + Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift index 43c8bfe1a..c9b24bb1d 100644 --- a/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift +++ b/Sources/LiveKit/Agent/Chat/Receive/TranscriptionDelegateReceiver.swift @@ -37,6 +37,7 @@ actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate { } deinit { + continuation?.finish() room.remove(delegate: self) } diff --git a/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift index 3fcfc87e0..d1c7e6a86 100644 --- a/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift +++ b/Sources/LiveKit/Agent/Chat/Send/TextMessageSender.swift @@ -33,6 +33,10 @@ actor TextMessageSender: MessageSender, MessageReceiver { self.topic = topic } + deinit { + messageContinuation?.finish() + } + func send(_ message: SentMessage) async throws { guard case let .userInput(text) = message.content else { return }