Skip to content

Commit 845aee2

Browse files
authored
Session API (#789)
Adds 3 basic building blocks for simple(r) agent experiences: - `Session` - connection, pre-connect, agent dispatch, agent filtering (e.g. by name), all agents, messages (broadcasted and aggregated for now) - `Agent` - wrapper around `Participant`, knows its tracks and internal state - `LocalMedia` - (unrelated) helper to deal with local tracks in SwiftUI Example: livekit-examples/agent-starter-swift#29
1 parent 0bb87eb commit 845aee2

File tree

16 files changed

+1373
-8
lines changed

16 files changed

+1373
-8
lines changed

.changes/agent-session

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
minor type="added" "Agent and Session APIs for creating agent-based apps"

.github/workflows/ci.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ jobs:
6161
# https://github.com/actions/runner-images/blob/main/images/macos/macos-26-arm64-Readme.md
6262
- os: macos-26
6363
xcode: latest
64-
platform: "iOS Simulator,name=iPhone 17 Pro,OS=26.0"
64+
platform: "iOS Simulator,name=iPhone 17 Pro,OS=26.1"
6565
symbol-graph: true
6666
- os: macos-26
6767
xcode: latest
68-
platform: "iOS Simulator,name=iPhone 17 Pro,OS=26.0"
68+
platform: "iOS Simulator,name=iPhone 17 Pro,OS=26.1"
6969
extension-api-only: true
7070
- os: macos-26
7171
xcode: latest
@@ -84,10 +84,10 @@ jobs:
8484
platform: "macOS,variant=Mac Catalyst"
8585
- os: macos-26
8686
xcode: latest
87-
platform: "visionOS Simulator,name=Apple Vision Pro,OS=26.0"
87+
platform: "visionOS Simulator,name=Apple Vision Pro,OS=26.1"
8888
- os: macos-26
8989
xcode: latest
90-
platform: "tvOS Simulator,name=Apple TV,OS=26.0"
90+
platform: "tvOS Simulator,name=Apple TV,OS=26.1"
9191

9292
runs-on: ${{ matrix.os }}
9393
timeout-minutes: 60

Sources/LiveKit/Agent/Agent.swift

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Foundation
18+
19+
/// Represents a LiveKit Agent.
20+
///
21+
/// The ``Agent`` struct represents the state of a LiveKit agent within a ``Session``.
22+
/// It provides information about the agent's connection status, its current state
23+
/// (e.g., listening, thinking, speaking), and its media tracks.
24+
///
25+
/// The ``Agent``'s properties are updated automatically by the ``Session`` as the agent's
26+
/// state changes. This allows the application to react to the agent's
27+
/// behavior, such as displaying its avatar video or indicating when it is speaking.
28+
/// The ``agentState`` property is particularly useful for building UIs that reflect
29+
/// the agent's current activity.
30+
///
31+
/// - SeeAlso: [LiveKit SwiftUI Agent Starter](https://github.com/livekit-examples/agent-starter-swift).
32+
/// - SeeAlso: [LiveKit Agents documentation](https://docs.livekit.io/agents/).
33+
public struct Agent: Loggable {
34+
// MARK: - Error
35+
36+
public enum Error: LocalizedError {
37+
case timeout
38+
39+
public var errorDescription: String? {
40+
switch self {
41+
case .timeout:
42+
"Agent did not connect"
43+
}
44+
}
45+
}
46+
47+
// MARK: - State
48+
49+
private enum State {
50+
case disconnected
51+
case connecting(buffering: Bool)
52+
case connected(agentState: AgentState, audioTrack: (any AudioTrack)?, avatarVideoTrack: (any VideoTrack)?)
53+
case failed(error: Error)
54+
}
55+
56+
private var state: State = .disconnected
57+
58+
// MARK: - Transitions
59+
60+
mutating func disconnected() {
61+
log("Agent disconnected from \(state)")
62+
// From any state
63+
state = .disconnected
64+
}
65+
66+
mutating func failed(error: Error) {
67+
log("Agent failed with error \(error) from \(state)")
68+
// From any state
69+
state = .failed(error: error)
70+
}
71+
72+
mutating func connecting(buffering: Bool) {
73+
log("Agent connecting from \(state)")
74+
switch state {
75+
case .disconnected, .connecting:
76+
state = .connecting(buffering: buffering)
77+
default:
78+
log("Invalid transition from \(state) to connecting", .warning)
79+
}
80+
}
81+
82+
mutating func connected(participant: Participant) {
83+
log("Agent connected to \(participant) from \(state)")
84+
switch state {
85+
case .connecting, .connected:
86+
state = .connected(agentState: participant.agentState,
87+
audioTrack: participant.agentAudioTrack,
88+
avatarVideoTrack: participant.avatarVideoTrack)
89+
default:
90+
log("Invalid transition from \(state) to connected", .warning)
91+
}
92+
}
93+
94+
// MARK: - Public
95+
96+
/// A boolean value indicating whether the agent is connected.
97+
public var isConnected: Bool {
98+
switch state {
99+
case .connected: true
100+
default: false
101+
}
102+
}
103+
104+
/// The current conversational state of the agent.
105+
public var agentState: AgentState? {
106+
switch state {
107+
case let .connected(agentState, _, _): agentState
108+
default: nil
109+
}
110+
}
111+
112+
/// The agent's audio track.
113+
public var audioTrack: (any AudioTrack)? {
114+
switch state {
115+
case let .connected(_, audioTrack, _): audioTrack
116+
default: nil
117+
}
118+
}
119+
120+
/// The agent's avatar video track.
121+
public var avatarVideoTrack: (any VideoTrack)? {
122+
switch state {
123+
case let .connected(_, _, avatarVideoTrack): avatarVideoTrack
124+
default: nil
125+
}
126+
}
127+
128+
/// The last error that occurred.
129+
public var error: Error? {
130+
switch state {
131+
case let .failed(error): error
132+
default: nil
133+
}
134+
}
135+
}
136+
137+
private extension Participant {
138+
var agentAudioTrack: (any AudioTrack)? {
139+
audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack
140+
}
141+
142+
var avatarVideoTrack: (any VideoTrack)? {
143+
avatarWorker?.firstCameraVideoTrack
144+
}
145+
}
146+
147+
extension AgentState: CustomStringConvertible {
148+
public var description: String {
149+
rawValue.capitalized
150+
}
151+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Foundation
18+
19+
/// A message received from the agent.
20+
public struct ReceivedMessage: Identifiable, Equatable, Codable, Sendable {
21+
public let id: String
22+
public let timestamp: Date
23+
public let content: Content
24+
25+
public enum Content: Equatable, Codable, Sendable {
26+
case agentTranscript(String)
27+
case userTranscript(String)
28+
case userInput(String)
29+
}
30+
}
31+
32+
/// A message sent to the agent.
33+
public struct SentMessage: Identifiable, Equatable, Codable, Sendable {
34+
public let id: String
35+
public let timestamp: Date
36+
public let content: Content
37+
38+
public enum Content: Equatable, Codable, Sendable {
39+
case userInput(String)
40+
}
41+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Foundation
18+
19+
/// A protocol that defines a message receiver.
20+
///
21+
/// A message receiver is responsible for creating a stream of messages from the agent.
22+
/// It is used to receive messages from the agent and update the message feed.
23+
public protocol MessageReceiver: Sendable {
24+
func messages() async throws -> AsyncStream<ReceivedMessage>
25+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Foundation
18+
19+
/// An actor that receives transcription messages from the room and yields them as messages.
20+
///
21+
/// Room delegate methods are called multiple times for each message, with a stable message ID
22+
/// that can be direcly used for diffing.
23+
///
24+
/// Example:
25+
/// ```
26+
/// { id: "1", content: "Hello" }
27+
/// { id: "1", content: "Hello world!" }
28+
/// ```
29+
@available(*, deprecated, message: "Use TranscriptionStreamReceiver compatible with livekit-agents 1.0")
30+
actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate {
31+
private let room: Room
32+
private var continuation: AsyncStream<ReceivedMessage>.Continuation?
33+
34+
init(room: Room) {
35+
self.room = room
36+
room.add(delegate: self)
37+
}
38+
39+
deinit {
40+
continuation?.finish()
41+
room.remove(delegate: self)
42+
}
43+
44+
/// Creates a new message stream for the transcription delegate receiver.
45+
func messages() -> AsyncStream<ReceivedMessage> {
46+
let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self)
47+
self.continuation = continuation
48+
return stream
49+
}
50+
51+
nonisolated func room(_: Room, participant: Participant, trackPublication _: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment]) {
52+
segments
53+
.filter { !$0.text.isEmpty }
54+
.forEach { segment in
55+
let message = ReceivedMessage(
56+
id: segment.id,
57+
timestamp: segment.lastReceivedTime,
58+
content: participant.isAgent ? .agentTranscript(segment.text) : .userTranscript(segment.text)
59+
)
60+
Task {
61+
await yield(message)
62+
}
63+
}
64+
}
65+
66+
private func yield(_ message: ReceivedMessage) {
67+
continuation?.yield(message)
68+
}
69+
}

0 commit comments

Comments
 (0)