-
Notifications
You must be signed in to change notification settings - Fork 154
Session API #789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Session API #789
Changes from 35 commits
bc411aa
43861e8
314b1c1
93f3081
319798f
d4a496e
dfa4db6
54acf68
bec88b6
33b02f9
d141d6a
0e49e6f
7b954da
06609c1
ac90eb4
d84ef9b
4fcd651
d5e6437
9cf68ff
ae38145
2327745
ddf20a5
89e25f5
a93fcf0
961b7c4
15509a9
00d74cd
24e6f18
8ebbc9c
095ffe7
a2654a0
44e2af9
9220587
624d580
6cb1a38
565db30
160f8fb
a787606
7a71453
585036b
385d2bb
6846414
99e74b8
d8cf47d
b32b5db
ce657d4
ead1a44
b20d1dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| minor type="added" "Agent and Session APIs for creating agent-based apps" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,161 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| import Foundation | ||
|
|
||
| /// Represents a LiveKit Agent. | ||
| /// | ||
| /// The ``Agent`` struct represents the state of a LiveKit agent within a ``Session``. | ||
| /// It provides information about the agent's connection status, its current state | ||
| /// (e.g., listening, thinking, speaking), and its media tracks. | ||
| /// | ||
| /// The ``Agent``'s properties are updated automatically by the ``Session`` as the agent's | ||
| /// state changes. This allows the application to react to the agent's | ||
| /// behavior, such as displaying its avatar video or indicating when it is speaking. | ||
| /// The ``agentState`` property is particularly useful for building UIs that reflect | ||
| /// the agent's current activity. | ||
| /// | ||
| /// - SeeAlso: [LiveKit SwiftUI Agent Starter](https://github.com/livekit-examples/agent-starter-swift). | ||
| /// - SeeAlso: [LiveKit Agents documentation](https://docs.livekit.io/agents/). | ||
| public struct Agent: Loggable { | ||
| // MARK: - Error | ||
|
|
||
| public enum Error: LocalizedError { | ||
| case timeout | ||
|
|
||
| public var errorDescription: String? { | ||
| switch self { | ||
| case .timeout: | ||
| "Agent did not connect" | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // MARK: - State | ||
|
|
||
| private enum State { | ||
| case disconnected | ||
| case connecting | ||
| case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?) | ||
| case failed(Error) | ||
| } | ||
|
|
||
| private var state: State = .disconnected | ||
|
|
||
| // MARK: - Transitions | ||
|
|
||
| mutating func disconnected() { | ||
| log("Agent disconnected from \(state)") | ||
| // From any state | ||
| state = .disconnected | ||
| } | ||
|
|
||
| mutating func failed(_ error: Error) { | ||
| log("Agent failed with error \(error) from \(state)") | ||
| // From any state | ||
| state = .failed(error) | ||
| } | ||
|
|
||
| mutating func connecting() { | ||
| log("Agent connecting from \(state)") | ||
| switch state { | ||
| case .disconnected, .connecting, .connected: // pre-connect is listening (connected) | ||
| state = .connecting | ||
| default: | ||
| log("Invalid transition from \(state) to connecting", .warning) | ||
| } | ||
| } | ||
|
|
||
| mutating func listening() { | ||
| log("Agent listening from \(state)") | ||
| switch state { | ||
| case .disconnected: | ||
| state = .connected(agentState: .listening, audioTrack: nil, avatarVideoTrack: nil) | ||
| default: | ||
| log("Invalid transition from \(state) to listening", .warning) | ||
| } | ||
| } | ||
|
|
||
| mutating func connected(participant: Participant) { | ||
| log("Agent connected to \(participant) from \(state)") | ||
| switch state { | ||
| case .connecting, .connected: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the perspective of UI framework, it does not really matter (for perf) https://medium.com/airbnb-engineering/understanding-and-improving-swiftui-performance-37b77ac61896 as we use the non-equatable (default) comparison for the |
||
| state = .connected(agentState: participant.agentState, | ||
| audioTrack: participant.agentAudioTrack, | ||
| avatarVideoTrack: participant.avatarVideoTrack) | ||
| default: | ||
| log("Invalid transition from \(state) to connected", .warning) | ||
| } | ||
| } | ||
|
|
||
| // MARK: - Public | ||
|
|
||
| /// A boolean value indicating whether the agent is connected. | ||
| public var isConnected: Bool { | ||
| switch state { | ||
| case .connected: true | ||
| default: false | ||
| } | ||
| } | ||
|
|
||
| /// The current conversational state of the agent. | ||
| public var agentState: AgentState? { | ||
| switch state { | ||
| case let .connected(agentState, _, _): agentState | ||
| default: nil | ||
| } | ||
| } | ||
|
|
||
| /// The agent's audio track. | ||
| public var audioTrack: (any AudioTrack)? { | ||
| switch state { | ||
| case let .connected(_, audioTrack, _): audioTrack | ||
| default: nil | ||
| } | ||
| } | ||
|
|
||
| /// The agent's avatar video track. | ||
| public var avatarVideoTrack: (any VideoTrack)? { | ||
| switch state { | ||
| case let .connected(_, _, avatarVideoTrack): avatarVideoTrack | ||
| default: nil | ||
| } | ||
| } | ||
|
|
||
| /// The last error that occurred. | ||
| public var error: Error? { | ||
| switch state { | ||
| case let .failed(error): error | ||
| default: nil | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private extension Participant { | ||
| var agentAudioTrack: (any AudioTrack)? { | ||
| audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack | ||
| } | ||
|
|
||
| var avatarVideoTrack: (any VideoTrack)? { | ||
| avatarWorker?.firstCameraVideoTrack | ||
| } | ||
| } | ||
|
|
||
| extension AgentState: CustomStringConvertible { | ||
| public var description: String { | ||
| rawValue.capitalized | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| import Foundation | ||
|
|
||
| /// A message received from the agent. | ||
| public struct ReceivedMessage: Identifiable, Equatable, Codable, Sendable { | ||
| public let id: String | ||
| public let timestamp: Date | ||
| public let content: Content | ||
|
|
||
| public enum Content: Equatable, Codable, Sendable { | ||
| case agentTranscript(String) | ||
| case userTranscript(String) | ||
| case userInput(String) | ||
| } | ||
| } | ||
|
|
||
| /// A message sent to the agent. | ||
| public struct SentMessage: Identifiable, Equatable, Codable, Sendable { | ||
| public let id: String | ||
| public let timestamp: Date | ||
| public let content: Content | ||
|
|
||
| public enum Content: Equatable, Codable, Sendable { | ||
| case userInput(String) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| /* | ||
| * Copyright 2025 LiveKit | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| import Foundation | ||
|
|
||
| /// A protocol that defines a message receiver. | ||
| /// | ||
| /// A message receiver is responsible for creating a stream of messages from the agent. | ||
| /// It is used to receive messages from the agent and update the message feed. | ||
| public protocol MessageReceiver: Sendable { | ||
| func messages() async throws -> AsyncStream<ReceivedMessage> | ||
| } |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,68 @@ | ||||
| /* | ||||
| * Copyright 2025 LiveKit | ||||
| * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | ||||
| * You may obtain a copy of the License at | ||||
| * | ||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||
| * | ||||
| * Unless required by applicable law or agreed to in writing, software | ||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | ||||
| */ | ||||
|
|
||||
| import Foundation | ||||
|
|
||||
| /// An actor that receives transcription messages from the room and yields them as messages. | ||||
| /// | ||||
| /// Room delegate methods are called multiple times for each message, with a stable message ID | ||||
| /// that can be direcly used for diffing. | ||||
| /// | ||||
| /// Example: | ||||
| /// ``` | ||||
| /// { id: "1", content: "Hello" } | ||||
| /// { id: "1", content: "Hello world!" } | ||||
| /// ``` | ||||
| @available(*, deprecated, message: "Use TranscriptionStreamReceiver compatible with livekit-agents 1.0") | ||||
| actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate { | ||||
| private let room: Room | ||||
| private var continuation: AsyncStream<ReceivedMessage>.Continuation? | ||||
|
|
||||
| init(room: Room) { | ||||
| self.room = room | ||||
| room.add(delegate: self) | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think
|
||||
| } | ||||
|
|
||||
| deinit { | ||||
| room.remove(delegate: self) | ||||
| } | ||||
|
|
||||
| /// Creates a new message stream for the transcription delegate receiver. | ||||
| func messages() -> AsyncStream<ReceivedMessage> { | ||||
| let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) | ||||
| self.continuation = continuation | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
And I wonder if we should have an explicit stop function like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a general problem with
It can be cancelled from the outside like this: let locations = AsyncLocationStream()
let task = Task {
for await location in locations.stream {
print(location)
}
}
task.cancel()There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re: stream idempotence, I think we've got 2 choices:
|
||||
| return stream | ||||
| } | ||||
|
|
||||
| nonisolated func room(_: Room, participant: Participant, trackPublication _: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment]) { | ||||
| segments | ||||
| .filter { !$0.text.isEmpty } | ||||
| .forEach { segment in | ||||
| let message = ReceivedMessage( | ||||
| id: segment.id, | ||||
| timestamp: segment.lastReceivedTime, | ||||
| content: participant.isAgent ? .agentTranscript(segment.text) : .userTranscript(segment.text) | ||||
| ) | ||||
| Task { | ||||
| await yield(message) | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| private func yield(_ message: ReceivedMessage) { | ||||
| continuation?.yield(message) | ||||
| } | ||||
| } | ||||
Uh oh!
There was an error while loading. Please reload this page.