Skip to content

Commit dede7eb

Browse files
authored
Add OpenAI realtime support (#108)
1 parent 061514a commit dede7eb

30 files changed

+1592
-43
lines changed

README.md

+143
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,149 @@ This example it taken from OpenAI's [function calling guide](https://platform.op
876876
```
877877

878878

879+
### How use realtime audio with OpenAI
880+
881+
Use this example to have a conversation with OpenAI's realtime models.
882+
883+
We recommend getting a basic chat completion with OpenAI working before attempting realtime.
884+
Realtime is a more involved integration (as you can see from the code snippet below), and
885+
getting a basic integration working first narrows down the source of any problem.
886+
887+
Take these steps to build and run an OpenAI realtime example:
888+
889+
1. Generate a new SwiftUI Xcode project called `MyApp`
890+
2. Add the `NSMicrophoneUsageDescription` key to your info.plist file
891+
3. If macOS, tap your project > your target > Signing & Capabilities and add the following:
892+
- App Sandbox > Outgoing Connections (client)
893+
- App Sandbox > Audio Input
894+
- Hardened Runtime > AudioInput
895+
4. Replace the contents of `MyApp.swift` with the snippet below
896+
5. Replace the placeholders in the snippet
897+
- If connecting directly to OpenAI, replace `your-openai-key`
898+
- If protecting your connection through AIProxy, replace `aiproxy-partial-key` and `aiproxy-service-url`
899+
6. Set the `logLevel` argument of the `openAIService.realtimeSession` call to your desired level. If you leave
900+
it set at `.debug`, then you'll see logs for all audio samples that we send and receive from OpenAI.
901+
902+
**Important** If you would like to protect your connection through AIProxy's backend, your
903+
AIProxy project must be enabled for websocket use. Please reach out if you would like to be
904+
added to the private beta.
905+
906+
```swift
907+
import SwiftUI
908+
import AIProxy
909+
910+
@main
911+
struct MyApp: App {
912+
913+
let realtimeManager = RealtimeManager()
914+
915+
var body: some Scene {
916+
WindowGroup {
917+
Button("Start conversation") {
918+
Task {
919+
try await realtimeManager.startConversation()
920+
}
921+
}
922+
}
923+
}
924+
}
925+
926+
@RealtimeActor
927+
final class RealtimeManager {
928+
private var realtimeSession: OpenAIRealtimeSession?
929+
private var microphonePCMSampleVendor: MicrophonePCMSampleVendor?
930+
private var audioPCMPlayer: AudioPCMPlayer?
931+
932+
nonisolated init() {}
933+
934+
func startConversation() async throws {
935+
/* Uncomment for BYOK use cases */
936+
// let openAIService = AIProxy.openAIDirectService(
937+
// unprotectedAPIKey: "your-openai-key"
938+
// )
939+
940+
/* Uncomment to protect your connection through AIProxy */
941+
// let openAIService = AIProxy.openAIService(
942+
// partialKey: "partial-key-from-your-developer-dashboard",
943+
// serviceURL: "service-url-from-your-developer-dashboard"
944+
// )
945+
946+
// Set to false if you want your user to speak first
947+
let aiSpeaksFirst = true
948+
949+
// Initialize an audio player to play PCM16 data that we receive from OpenAI:
950+
let audioPCMPlayer = try AudioPCMPlayer()
951+
952+
// Initialize a microphone vendor to vend PCM16 audio samples that we'll send to OpenAI:
953+
let microphonePCMSampleVendor = MicrophonePCMSampleVendor()
954+
let audioStream = try microphonePCMSampleVendor.start()
955+
956+
// Start the realtime session:
957+
let configuration = OpenAIRealtimeSessionConfiguration(
958+
inputAudioFormat: .pcm16,
959+
inputAudioTranscription: .init(model: "whisper-1"),
960+
instructions: "You are a tour guide of Yosemite national park",
961+
maxResponseOutputTokens: .int(4096),
962+
modalities: [.audio, .text],
963+
outputAudioFormat: .pcm16,
964+
temperature: 0.7,
965+
turnDetection: .init(
966+
prefixPaddingMs: 200,
967+
silenceDurationMs: 500,
968+
threshold: 0.5
969+
),
970+
voice: "shimmer"
971+
)
972+
973+
let realtimeSession = try await openAIService.realtimeSession(
974+
model: "gpt-4o-mini-realtime-preview-2024-12-17",
975+
configuration: configuration,
976+
logLevel: .debug
977+
)
978+
979+
// Send audio from the microphone to OpenAI once OpenAI is ready for it:
980+
var isOpenAIReadyForAudio = false
981+
Task {
982+
for await buffer in audioStream {
983+
if isOpenAIReadyForAudio, let base64Audio = AIProxy.base64EncodeAudioPCMBuffer(from: buffer) {
984+
await realtimeSession.sendMessage(
985+
OpenAIRealtimeInputAudioBufferAppend(audio: base64Audio)
986+
)
987+
}
988+
}
989+
}
990+
991+
// Listen for messages from OpenAI:
992+
Task {
993+
for await message in realtimeSession.receiver {
994+
switch message {
995+
case .error(_):
996+
realtimeSession.disconnect()
997+
case .sessionUpdated:
998+
if aiSpeaksFirst {
999+
await realtimeSession.sendMessage(OpenAIRealtimeResponseCreate())
1000+
} else {
1001+
isOpenAIReadyForAudio = true
1002+
}
1003+
case .responseAudioDelta(let base64Audio):
1004+
audioPCMPlayer.playPCM16Audio(from: base64Audio)
1005+
case .inputAudioBufferSpeechStarted:
1006+
audioPCMPlayer.interruptPlayback()
1007+
case .responseCreated:
1008+
isOpenAIReadyForAudio = true
1009+
default:
1010+
break
1011+
}
1012+
}
1013+
}
1014+
1015+
self.microphonePCMSampleVendor = microphonePCMSampleVendor
1016+
self.audioPCMPlayer = audioPCMPlayer
1017+
self.realtimeSession = realtimeSession
1018+
}
1019+
}
1020+
```
1021+
8791022
### How to use OpenAI through an Azure deployment
8801023

8811024
You can use all of the OpenAI snippets aboves with one change. Initialize the OpenAI service with:

Sources/AIProxy/AIProxy.swift

+20-8
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
1-
import OSLog
1+
import AVFoundation
22
#if canImport(AppKit) && !targetEnvironment(macCatalyst)
33
import AppKit
44
#elseif canImport(UIKit)
55
import UIKit
66
#endif
77

8-
let aiproxyLogger = Logger(
9-
subsystem: Bundle.main.bundleIdentifier ?? "UnknownApp",
10-
category: "AIProxy"
11-
)
12-
138
public struct AIProxy {
149

1510
/// The current sdk version
16-
public static let sdkVersion = "0.71.0"
11+
public static let sdkVersion = "0.72.0"
1712

1813
/// - Parameters:
1914
/// - partialKey: Your partial key is displayed in the AIProxy dashboard when you submit your provider's key.
@@ -890,11 +885,28 @@ public struct AIProxy {
890885
do {
891886
return try await AnonymousAccountStorage.sync()
892887
} catch {
893-
aiproxyLogger.critical("Could not configure an AIProxy anonymous account: \(error.localizedDescription)")
888+
if ll(.critical) { aiproxyLogger.critical("Could not configure an AIProxy anonymous account: \(error.localizedDescription)") }
894889
}
895890
return nil
896891
}
897892

893+
public static func base64EncodeAudioPCMBuffer(from buffer: AVAudioPCMBuffer) -> String? {
894+
guard buffer.format.channelCount == 1 else {
895+
if ll(.error) { aiproxyLogger.error("This encoding routine assumes a single channel") }
896+
return nil
897+
}
898+
899+
guard let audioBufferPtr = buffer.audioBufferList.pointee.mBuffers.mData else {
900+
if ll(.error) { aiproxyLogger.error("No audio buffer list available to encode") }
901+
return nil
902+
}
903+
904+
let audioBufferLenth = Int(buffer.audioBufferList.pointee.mBuffers.mDataByteSize)
905+
let data = Data(bytes: audioBufferPtr, count: audioBufferLenth).base64EncodedString()
906+
// print(data)
907+
return data
908+
}
909+
898910
private init() {
899911
fatalError("This type is not designed to be instantiated")
900912
}

Sources/AIProxy/AIProxyCertificatePinning.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ open class AIProxyCertificatePinningDelegate: NSObject, URLSessionDelegate, URLS
7070
_ challenge: URLAuthenticationChallenge
7171
) -> (URLSession.AuthChallengeDisposition, URLCredential?) {
7272
guard let secTrust = challenge.protectionSpace.serverTrust else {
73-
aiproxyLogger.error("Could not access the server's security space")
73+
if ll(.error) { aiproxyLogger.error("Could not access the server's security space") }
7474
return (.cancelAuthenticationChallenge, nil)
7575
}
7676

7777
guard let certificate = getServerCert(secTrust: secTrust) else {
78-
aiproxyLogger.error("Could not access the server's TLS cert")
78+
if ll(.error) { aiproxyLogger.error("Could not access the server's TLS cert") }
7979
return (.cancelAuthenticationChallenge, nil)
8080
}
8181

Sources/AIProxy/AIProxyDeviceCheck.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct AIProxyDeviceCheck {
3434
internal static func getToken() async -> String? {
3535
guard DCDevice.current.isSupported else {
3636
if ProcessInfo.processInfo.environment["AIPROXY_DEVICE_CHECK_BYPASS"] == nil {
37-
aiproxyLogger.warning("\(deviceCheckWarning, privacy: .public)")
37+
if ll(.warning) { aiproxyLogger.warning("\(deviceCheckWarning, privacy: .public)") }
3838
}
3939
return nil
4040
}
@@ -43,7 +43,7 @@ struct AIProxyDeviceCheck {
4343
let data = try await DCDevice.current.generateToken()
4444
return data.base64EncodedString()
4545
} catch {
46-
aiproxyLogger.error("Could not create DeviceCheck token. Are you using an explicit bundle identifier?")
46+
if ll(.error) { aiproxyLogger.error("Could not create DeviceCheck token. Are you using an explicit bundle identifier?") }
4747
return nil
4848
}
4949
}

Sources/AIProxy/AIProxyLogger.swift

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import OSLog
2+
3+
public enum AIProxyLogLevel: Int {
4+
case debug
5+
case info
6+
case warning
7+
case error
8+
case critical
9+
10+
func isAtOrAboveThresholdLevel(_ threshold: AIProxyLogLevel) -> Bool {
11+
return self.rawValue >= threshold.rawValue
12+
}
13+
}
14+
15+
internal var aiproxyCallerDesiredLogLevel = AIProxyLogLevel.warning
16+
internal let aiproxyLogger = Logger(
17+
subsystem: Bundle.main.bundleIdentifier ?? "UnknownApp",
18+
category: "AIProxy"
19+
)
20+
21+
// Why not create a wrapper around OSLog instead of forcing log callsites to include an `if ll(<level>)` check?
22+
// Because I like the Xcode log feature that links to the source location of the log.
23+
// If you create a wrapper, even one that is inlined, the Xcode source feature always links to the wrapper location.
24+
@inline(__always)
25+
internal func ll(_ logLevel: AIProxyLogLevel) -> Bool {
26+
return logLevel.isAtOrAboveThresholdLevel(aiproxyCallerDesiredLogLevel)
27+
}

Sources/AIProxy/AnonymousAccount/AIProxyKeychain.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ struct AIProxyKeychain {
132132
if status == noErr {
133133
return queryResult as? Data
134134
}
135-
aiproxyLogger.error("Unexpected keychain error in searchKeychainCopyMatching: \(status)")
135+
if ll(.error) { aiproxyLogger.error("Unexpected keychain error in searchKeychainCopyMatching: \(status)") }
136136
return nil
137137
}
138138

Sources/AIProxy/AnonymousAccount/AnonymousAccountStorage.swift

+16-18
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ final class AnonymousAccountStorage {
6666
// meaning the one that was created earliest. The design of this class is to eventually resolve out
6767
// to the earliest account across multiple devices.
6868
if !AIProxyStorage.ukvsSync() {
69-
aiproxyLogger.error("Could not synchronize NSUbiquitousKeyValueStore. Please ensure you enabled the key/value store in Target > Signing & Capabilities > iCloud > Key-Value storage?")
69+
if ll(.error) { aiproxyLogger.error("Could not synchronize NSUbiquitousKeyValueStore. Please ensure you enabled the key/value store in Target > Signing & Capabilities > iCloud > Key-Value storage?") }
7070
}
7171
if let ukvsAccountData = AIProxyStorage.ukvsAccountData() {
7272
let ukvsAccount = try AnonymousAccount.deserialize(from: ukvsAccountData)
@@ -78,7 +78,7 @@ final class AnonymousAccountStorage {
7878
localAccount = ukvsAccount
7979
self.localAccountChain.append(ukvsAccount)
8080
if try await AIProxyStorage.updateLocalAccountChainInKeychain(self.localAccountChain) != noErr {
81-
aiproxyLogger.warning("Could not update the local account chain")
81+
if ll(.warning) { aiproxyLogger.warning("Could not update the local account chain") }
8282
}
8383
} else {
8484
try AIProxyStorage.updateUKVS(localAccount)
@@ -108,17 +108,17 @@ final class AnonymousAccountStorage {
108108
localAccount = remoteAccount
109109
self.localAccountChain.append(remoteAccount)
110110
if try await AIProxyStorage.updateLocalAccountChainInKeychain(self.localAccountChain) != noErr {
111-
aiproxyLogger.warning("Could not update the local account chain")
111+
if ll(.warning) { aiproxyLogger.warning("Could not update the local account chain") }
112112
}
113113
try AIProxyStorage.updateUKVS(localAccount)
114114
} else {
115115
if try await AIProxyStorage.updateRemoteAccountInKeychain(localAccount) != noErr {
116-
aiproxyLogger.warning("Could not update the remote account")
116+
if ll(.warning) { aiproxyLogger.warning("Could not update the remote account") }
117117
}
118118
}
119119
}
120120
} else {
121-
aiproxyLogger.warning("Keychain cloud sync claims that there is a duplicate item, but we can't fetch it.")
121+
if ll(.warning) { aiproxyLogger.warning("Keychain cloud sync claims that there is a duplicate item, but we can't fetch it.") }
122122
}
123123
}
124124

@@ -129,10 +129,8 @@ final class AnonymousAccountStorage {
129129
name: NSUbiquitousKeyValueStore.didChangeExternallyNotification,
130130
object: NSUbiquitousKeyValueStore.default)
131131

132-
#if false
133-
aiproxyLogger.info("Local account chain is \(localAccountChain)")
134-
aiproxyLogger.info("Anonymous account identifier is \(self.resolvedAccount!.uuid)")
135-
#endif
132+
if ll(.debug) { aiproxyLogger.debug("Local account chain is \(localAccountChain)") }
133+
if ll(.debug) { aiproxyLogger.debug("Anonymous account identifier is \(self.resolvedAccount?.uuid ?? "unknown")") }
136134

137135
return localAccount.uuid
138136
}
@@ -156,10 +154,10 @@ final class AnonymousAccountStorage {
156154
}
157155

158156
switch changeReason.intValue {
159-
case NSUbiquitousKeyValueStoreServerChange: aiproxyLogger.info("AIProxy account changed due to remote server change")
160-
case NSUbiquitousKeyValueStoreInitialSyncChange: aiproxyLogger.info("AIProxy account changed due to initial sync change")
161-
case NSUbiquitousKeyValueStoreQuotaViolationChange: aiproxyLogger.info("AIProxy account changed due to quota violation")
162-
case NSUbiquitousKeyValueStoreAccountChange: aiproxyLogger.info("AIProxy account changed due to icloud account change")
157+
case NSUbiquitousKeyValueStoreServerChange: if ll(.info) { aiproxyLogger.info("AIProxy account changed due to remote server change") }
158+
case NSUbiquitousKeyValueStoreInitialSyncChange: if ll(.info) { aiproxyLogger.info("AIProxy account changed due to initial sync change") }
159+
case NSUbiquitousKeyValueStoreQuotaViolationChange: if ll(.info) { aiproxyLogger.info("AIProxy account changed due to quota violation") }
160+
case NSUbiquitousKeyValueStoreAccountChange: if ll(.info) { aiproxyLogger.info("AIProxy account changed due to icloud account change") }
163161
default:
164162
return
165163
}
@@ -174,12 +172,12 @@ final class AnonymousAccountStorage {
174172
}
175173

176174
guard ukvsAccount != resolvedAccount else {
177-
aiproxyLogger.info("UKVS remote sync is already up to date")
175+
if ll(.info) { aiproxyLogger.info("UKVS remote sync is already up to date") }
178176
return
179177
}
180178

181179
if ukvsAccount.timestamp <= resolvedAccount.timestamp {
182-
aiproxyLogger.info("UKVS account is older than our existing resolved account. Switching to the older account.")
180+
if ll(.info) { aiproxyLogger.info("UKVS account is older than our existing resolved account. Switching to the older account.") }
183181
self.resolvedAccount = ukvsAccount
184182
self.localAccountChain.append(ukvsAccount)
185183
DispatchQueue.main.async {
@@ -189,16 +187,16 @@ final class AnonymousAccountStorage {
189187
Task.detached {
190188
let updateLocal = try? await AIProxyStorage.updateLocalAccountChainInKeychain(self.localAccountChain)
191189
if updateLocal == nil || updateLocal! != noErr {
192-
aiproxyLogger.warning("Could not update the local account chain")
190+
if ll(.warning) { aiproxyLogger.warning("Could not update the local account chain") }
193191
}
194192

195193
let updateRemote = try? await AIProxyStorage.updateRemoteAccountInKeychain(ukvsAccount)
196194
if updateRemote == nil || updateRemote! != noErr {
197-
aiproxyLogger.warning("Could not update the remote account")
195+
if ll(.warning) { aiproxyLogger.warning("Could not update the remote account") }
198196
}
199197
}
200198
} else {
201-
aiproxyLogger.info("UKVS account is newer than our existing resolved account. Updating UKVS to use the older account.")
199+
if ll(.info) { aiproxyLogger.info("UKVS account is newer than our existing resolved account. Updating UKVS to use the older account.") }
202200
try? AIProxyStorage.updateUKVS(resolvedAccount)
203201
}
204202
}

Sources/AIProxy/Anthropic/AnthropicMessageStreamingContentBlockStart.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ internal struct AnthropicMessageStreamingContentBlockStart: Decodable {
1717
guard let chunkJSON = line.dropFirst(6).data(using: .utf8),
1818
let chunk = try? JSONDecoder().decode(Self.self, from: chunkJSON) else
1919
{
20-
aiproxyLogger.warning("Received unexpected JSON from Anthropic: \(line)")
20+
if ll(.warning) { aiproxyLogger.warning("Received unexpected JSON from Anthropic: \(line)") }
2121
return nil
2222
}
2323
return chunk

Sources/AIProxy/Anthropic/AnthropicMessageStreamingDeltaBlock.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ internal struct AnthropicMessageStreamingDeltaBlock: Decodable {
1919
guard let chunkJSON = line.dropFirst(6).data(using: .utf8),
2020
let chunk = try? JSONDecoder().decode(Self.self, from: chunkJSON) else
2121
{
22-
aiproxyLogger.warning("Received unexpected JSON from Anthropic: \(line)")
22+
if ll(.warning) { aiproxyLogger.warning("Received unexpected JSON from Anthropic: \(line)") }
2323
return nil
2424
}
2525
return chunk

0 commit comments

Comments
 (0)