- 
                Notifications
    You must be signed in to change notification settings 
- Fork 154
Session API #789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Session API #789
Changes from 46 commits
bc411aa
              43861e8
              314b1c1
              93f3081
              319798f
              d4a496e
              dfa4db6
              54acf68
              bec88b6
              33b02f9
              d141d6a
              0e49e6f
              7b954da
              06609c1
              ac90eb4
              d84ef9b
              4fcd651
              d5e6437
              9cf68ff
              ae38145
              2327745
              ddf20a5
              89e25f5
              a93fcf0
              961b7c4
              15509a9
              00d74cd
              24e6f18
              8ebbc9c
              095ffe7
              a2654a0
              44e2af9
              9220587
              624d580
              6cb1a38
              565db30
              160f8fb
              a787606
              7a71453
              585036b
              385d2bb
              6846414
              99e74b8
              d8cf47d
              b32b5db
              ce657d4
              ead1a44
              b20d1dc
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1 @@ | ||
| minor type="added" "Agent and Session APIs for creating agent-based apps" | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,151 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|  | ||
| import Foundation | ||
|  | ||
| /// Represents a LiveKit Agent. | ||
| /// | ||
| /// The ``Agent`` struct represents the state of a LiveKit agent within a ``Session``. | ||
| /// It provides information about the agent's connection status, its current state | ||
| /// (e.g., listening, thinking, speaking), and its media tracks. | ||
| /// | ||
| /// The ``Agent``'s properties are updated automatically by the ``Session`` as the agent's | ||
| /// state changes. This allows the application to react to the agent's | ||
| /// behavior, such as displaying its avatar video or indicating when it is speaking. | ||
| /// The ``agentState`` property is particularly useful for building UIs that reflect | ||
| /// the agent's current activity. | ||
| /// | ||
| /// - SeeAlso: [LiveKit SwiftUI Agent Starter](https://github.com/livekit-examples/agent-starter-swift). | ||
| /// - SeeAlso: [LiveKit Agents documentation](https://docs.livekit.io/agents/). | ||
| public struct Agent: Loggable { | ||
| // MARK: - Error | ||
|  | ||
| public enum Error: LocalizedError { | ||
| case timeout | ||
|  | ||
| public var errorDescription: String? { | ||
| switch self { | ||
| case .timeout: | ||
| "Agent did not connect" | ||
| } | ||
| } | ||
| } | ||
|  | ||
| // MARK: - State | ||
|  | ||
| private enum State { | ||
| case disconnected | ||
| case connecting(buffering: Bool) | ||
| case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?) | ||
| case failed(error: Error) | ||
| } | ||
|  | ||
| private var state: State = .disconnected | ||
|  | ||
| // MARK: - Transitions | ||
|  | ||
| mutating func disconnected() { | ||
|         
                  pblazej marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| log("Agent disconnected from \(state)") | ||
| // From any state | ||
| state = .disconnected | ||
| } | ||
|  | ||
| mutating func failed(error: Error) { | ||
| log("Agent failed with error \(error) from \(state)") | ||
| // From any state | ||
| state = .failed(error: error) | ||
| } | ||
|  | ||
| mutating func connecting(buffering: Bool) { | ||
| log("Agent connecting from \(state)") | ||
| switch state { | ||
| case .disconnected, .connecting: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's no  | ||
| state = .connecting(buffering: buffering) | ||
| default: | ||
| log("Invalid transition from \(state) to connecting", .warning) | ||
| } | ||
| } | ||
|  | ||
| mutating func connected(participant: Participant) { | ||
| log("Agent connected to \(participant) from \(state)") | ||
| switch state { | ||
| case .connecting, .connected: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the perspective of UI framework, it does not really matter (for perf) https://medium.com/airbnb-engineering/understanding-and-improving-swiftui-performance-37b77ac61896 as we use the non-equatable (default) comparison for the  | ||
| state = .connected(agentState: participant.agentState, | ||
| audioTrack: participant.agentAudioTrack, | ||
| avatarVideoTrack: participant.avatarVideoTrack) | ||
| default: | ||
| log("Invalid transition from \(state) to connected", .warning) | ||
| } | ||
| } | ||
|  | ||
| // MARK: - Public | ||
|  | ||
| /// A boolean value indicating whether the agent is connected. | ||
| public var isConnected: Bool { | ||
| switch state { | ||
| case .connected: true | ||
| default: false | ||
| } | ||
| } | ||
|  | ||
| /// The current conversational state of the agent. | ||
| public var agentState: AgentState? { | ||
| switch state { | ||
| case let .connected(agentState, _, _): agentState | ||
| default: nil | ||
| } | ||
| } | ||
|  | ||
| /// The agent's audio track. | ||
| public var audioTrack: (any AudioTrack)? { | ||
| switch state { | ||
| case let .connected(_, audioTrack, _): audioTrack | ||
| default: nil | ||
| } | ||
| } | ||
|  | ||
| /// The agent's avatar video track. | ||
| public var avatarVideoTrack: (any VideoTrack)? { | ||
| switch state { | ||
| case let .connected(_, _, avatarVideoTrack): avatarVideoTrack | ||
| default: nil | ||
| } | ||
| } | ||
|  | ||
| /// The last error that occurred. | ||
| public var error: Error? { | ||
| switch state { | ||
| case let .failed(error): error | ||
| default: nil | ||
| } | ||
| } | ||
| } | ||
|  | ||
| private extension Participant { | ||
| var agentAudioTrack: (any AudioTrack)? { | ||
| audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack | ||
| } | ||
|  | ||
| var avatarVideoTrack: (any VideoTrack)? { | ||
| avatarWorker?.firstCameraVideoTrack | ||
| } | ||
| } | ||
|  | ||
| extension AgentState: CustomStringConvertible { | ||
| public var description: String { | ||
| rawValue.capitalized | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|  | ||
| import Foundation | ||
|  | ||
| /// A message received from the agent. | ||
| public struct ReceivedMessage: Identifiable, Equatable, Codable, Sendable { | ||
| public let id: String | ||
| public let timestamp: Date | ||
| public let content: Content | ||
|  | ||
| public enum Content: Equatable, Codable, Sendable { | ||
| case agentTranscript(String) | ||
| case userTranscript(String) | ||
| case userInput(String) | ||
| } | ||
| } | ||
|  | ||
| /// A message sent to the agent. | ||
| public struct SentMessage: Identifiable, Equatable, Codable, Sendable { | ||
| public let id: String | ||
| public let timestamp: Date | ||
| public let content: Content | ||
|  | ||
| public enum Content: Equatable, Codable, Sendable { | ||
| case userInput(String) | ||
| } | ||
| } | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|  | ||
| import Foundation | ||
|  | ||
| /// A protocol that defines a message receiver. | ||
| /// | ||
| /// A message receiver is responsible for creating a stream of messages from the agent. | ||
| /// It is used to receive messages from the agent and update the message feed. | ||
| public protocol MessageReceiver: Sendable { | ||
| func messages() async throws -> AsyncStream<ReceivedMessage> | ||
| } | 
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,69 @@ | ||||
| /* | ||||
| * Copyright 2025 LiveKit | ||||
| * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | ||||
| * You may obtain a copy of the License at | ||||
| * | ||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||
| * | ||||
| * Unless required by applicable law or agreed to in writing, software | ||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | ||||
| */ | ||||
|  | ||||
| import Foundation | ||||
|  | ||||
| /// An actor that receives transcription messages from the room and yields them as messages. | ||||
| /// | ||||
| /// Room delegate methods are called multiple times for each message, with a stable message ID | ||||
| /// that can be direcly used for diffing. | ||||
| /// | ||||
| /// Example: | ||||
| /// ``` | ||||
| /// { id: "1", content: "Hello" } | ||||
| /// { id: "1", content: "Hello world!" } | ||||
| /// ``` | ||||
| @available(*, deprecated, message: "Use TranscriptionStreamReceiver compatible with livekit-agents 1.0") | ||||
| actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate { | ||||
| private let room: Room | ||||
| private var continuation: AsyncStream<ReceivedMessage>.Continuation? | ||||
|  | ||||
| init(room: Room) { | ||||
| self.room = room | ||||
| room.add(delegate: self) | ||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think  
 | ||||
| } | ||||
|  | ||||
| deinit { | ||||
| continuation?.finish() | ||||
| room.remove(delegate: self) | ||||
| } | ||||
|  | ||||
| /// Creates a new message stream for the transcription delegate receiver. | ||||
| func messages() -> AsyncStream<ReceivedMessage> { | ||||
| let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) | ||||
| self.continuation = continuation | ||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 And I wonder if we should have an explicit stop function like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a general problem with  
 It can be cancelled from the outside like this: let locations = AsyncLocationStream()
let task = Task {
    for await location in locations.stream {
        print(location)
    }
}
task.cancel()There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re: stream idempotence, I think we've got 2 choices: 
 | ||||
| return stream | ||||
| } | ||||
|  | ||||
| nonisolated func room(_: Room, participant: Participant, trackPublication _: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment]) { | ||||
| segments | ||||
| .filter { !$0.text.isEmpty } | ||||
| .forEach { segment in | ||||
| let message = ReceivedMessage( | ||||
| id: segment.id, | ||||
| timestamp: segment.lastReceivedTime, | ||||
| content: participant.isAgent ? .agentTranscript(segment.text) : .userTranscript(segment.text) | ||||
| ) | ||||
| Task { | ||||
| await yield(message) | ||||
| } | ||||
| } | ||||
| } | ||||
|  | ||||
| private func yield(_ message: ReceivedMessage) { | ||||
| continuation?.yield(message) | ||||
| } | ||||
| } | ||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what this |buffering| mean ? or prebuffering is what it means ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's equivalent to JS
isBufferingSpeechso pre-connect buffer, it's not exposed anywhere, but we can rename it