Merge pull request #169 from kalafus/MacPaw.parameter_update

API: Parameter Update
MacPaw · Feb 15, 2024 · 224f4ef · 224f4ef
2 parents 1f6d1c0 + ba5cd2b
commit 224f4ef
Show file tree

Hide file tree

Showing 32 changed files with 1,507 additions and 555 deletions.
diff --git a/Demo/DemoChat/Sources/ChatStore.swift b/Demo/DemoChat/Sources/ChatStore.swift
@@ -85,7 +85,7 @@ public final class ChatStore: ObservableObject {
                 return
             }
 
-            let weatherFunction = ChatFunctionDeclaration(
+            let weatherFunction = ChatQuery.ChatCompletionToolParam(function: .init(
                 name: "getWeatherData",
                 description: "Get the current weather in a given location",
                 parameters: .init(
@@ -95,38 +95,38 @@ public final class ChatStore: ObservableObject {
                   ],
                   required: ["location"]
                 )
-            )
+            ))
 
             let functions = [weatherFunction]
 
             let chatsStream: AsyncThrowingStream<ChatStreamResult, Error> = openAIClient.chatsStream(
                 query: ChatQuery(
-                    model: model,
                     messages: conversation.messages.map { message in
-                        Chat(role: message.role, content: message.content)
-                    },
-                    functions: functions
+                        ChatQuery.ChatCompletionMessageParam(role: message.role, content: message.content)!
+                    }, model: model,
+                    tools: functions
                 )
             )
 
-            var functionCallName = ""
-            var functionCallArguments = ""
+            var functionCalls = [(name: String, argument: String?)]()
             for try await partialChatResult in chatsStream {
                 for choice in partialChatResult.choices {
                     let existingMessages = conversations[conversationIndex].messages
                     // Function calls are also streamed, so we need to accumulate.
-                    if let functionCallDelta = choice.delta.functionCall {
-                        if let nameDelta = functionCallDelta.name {
-                          functionCallName += nameDelta
-                        }
-                        if let argumentsDelta = functionCallDelta.arguments {
-                          functionCallArguments += argumentsDelta
+                    choice.delta.toolCalls?.forEach { toolCallDelta in
+                        if let functionCallDelta = toolCallDelta.function {
+                            if let nameDelta = functionCallDelta.name {
+                                functionCalls.append((nameDelta, functionCallDelta.arguments))
+                            }
                         }
                     }
                     var messageText = choice.delta.content ?? ""
                     if let finishReason = choice.finishReason,
-                       finishReason == "function_call" {
-                        messageText += "Function call: name=\(functionCallName) arguments=\(functionCallArguments)"
+                       finishReason == .toolCalls
+                    {
+                        functionCalls.forEach { (name: String, argument: String?) in
+                            messageText += "Function call: name=\(name) arguments=\(argument ?? "")\n"
+                        }
                     }
                     let message = Message(
                         id: partialChatResult.id,

diff --git a/Demo/DemoChat/Sources/ImageStore.swift b/Demo/DemoChat/Sources/ImageStore.swift
@@ -11,7 +11,7 @@ import OpenAI
 public final class ImageStore: ObservableObject {
     public var openAIClient: OpenAIProtocol
 
-    @Published var images: [ImagesResult.URLResult] = []
+    @Published var images: [ImagesResult.Image] = []
 
     public init(
         openAIClient: OpenAIProtocol

diff --git a/Demo/DemoChat/Sources/MiscStore.swift b/Demo/DemoChat/Sources/MiscStore.swift
@@ -51,7 +51,7 @@ public final class MiscStore: ObservableObject {
         do {
             let response = try await openAIClient.moderations(
                 query: ModerationsQuery(
-                    input: message.content,
+                    input: .init(message.content),
                     model: .textModerationLatest
                 )
             )

diff --git a/Demo/DemoChat/Sources/Models/Message.swift b/Demo/DemoChat/Sources/Models/Message.swift
@@ -10,7 +10,7 @@ import OpenAI
 
 struct Message {
     var id: String
-    var role: Chat.Role
+    var role: ChatQuery.ChatCompletionMessageParam.Role
     var content: String
     var createdAt: Date
 }

diff --git a/Demo/DemoChat/Sources/SpeechStore.swift b/Demo/DemoChat/Sources/SpeechStore.swift
@@ -30,15 +30,16 @@ public final class SpeechStore: ObservableObject {
 
     @MainActor
     func createSpeech(_ query: AudioSpeechQuery) async {
-        guard let input = query.input, !input.isEmpty else { return }
+        let input = query.input
+        guard !input.isEmpty else { return }
         do {
             let response = try await openAIClient.audioCreateSpeech(query: query)
-            guard let data = response.audioData else { return }
+            let data = response.audio
             let player = try? AVAudioPlayer(data: data)
             let audioObject = AudioObject(prompt: input,
                                           audioPlayer: player,
                                           originResponse: response,
-                                          format: query.responseFormat.rawValue)
+                                          format: query.responseFormat?.rawValue ?? AudioSpeechQuery.AudioSpeechResponseFormat.mp3.rawValue)
             audioObjects.append(audioObject)
         } catch {
             print(error.localizedDescription)

diff --git a/Demo/DemoChat/Sources/UI/DetailView.swift b/Demo/DemoChat/Sources/UI/DetailView.swift
@@ -199,7 +199,7 @@ struct ChatBubble: View {
                     .foregroundColor(userForegroundColor)
                     .background(userBackgroundColor)
                     .clipShape(RoundedRectangle(cornerRadius: 16, style: .continuous))
-            case .function:
+            case .tool:
               Text(message.content)
                   .font(.footnote.monospaced())
                   .padding(.horizontal, 16)
@@ -223,7 +223,7 @@ struct DetailView_Previews: PreviewProvider {
                     Message(id: "1", role: .assistant, content: "Hello, how can I help you today?", createdAt: Date(timeIntervalSinceReferenceDate: 0)),
                     Message(id: "2", role: .user, content: "I need help with my subscription.", createdAt: Date(timeIntervalSinceReferenceDate: 100)),
                     Message(id: "3", role: .assistant, content: "Sure, what seems to be the problem with your subscription?", createdAt: Date(timeIntervalSinceReferenceDate: 200)),
-                    Message(id: "4", role: .function, content:
+                    Message(id: "4", role: .tool, content:
                               """
                               get_current_weather({
                                 "location": "Glasgow, Scotland",

diff --git a/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift b/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift
@@ -13,9 +13,9 @@ public struct ImageCreationView: View {
 
     @State private var prompt: String = ""
     @State private var n: Int = 1
-    @State private var size: String
-    
-    private var sizes = ["256x256", "512x512", "1024x1024"]
+    @State private var size = ImagesQuery.Size.allCases.first!
+
+    private var sizes = ImagesQuery.Size.allCases
 
     public init(store: ImageStore) {
         self.store = store
@@ -37,7 +37,7 @@ public struct ImageCreationView: View {
                 HStack {
                     Picker("Size", selection: $size) {
                         ForEach(sizes, id: \.self) {
-                            Text($0)
+                            Text($0.rawValue)
                         }
                     }
                 }
@@ -56,7 +56,7 @@ public struct ImageCreationView: View {
             }
             if !$store.images.isEmpty {
                 Section("Images") {
-                    ForEach($store.images, id: \.self) { image in
+                    ForEach($store.images, id: \.url) { image in
                         let urlString = image.wrappedValue.url ?? ""
                         if let imageURL = URL(string: urlString), UIApplication.shared.canOpenURL(imageURL) {
                             LinkPreview(previewURL: imageURL)

diff --git a/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift b/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift
@@ -12,7 +12,7 @@ public struct ListModelsView: View {
 
     public var body: some View {
         NavigationStack {
-            List($store.availableModels) { row in
+            List($store.availableModels.wrappedValue, id: \.id) { row in
                 Text(row.id)
             }
             .listStyle(.insetGrouped)

diff --git a/Demo/DemoChat/Sources/UI/TextToSpeechView.swift b/Demo/DemoChat/Sources/UI/TextToSpeechView.swift
@@ -101,7 +101,7 @@ public struct TextToSpeechView: View {
             }
             if !$store.audioObjects.wrappedValue.isEmpty {
                 Section("Click to play, swipe to save:") {
-                    ForEach(store.audioObjects) { object in
+                    ForEach(store.audioObjects, id: \.id) { object in
                         HStack {
                             Text(object.prompt.capitalized)
                             Spacer()
@@ -122,7 +122,7 @@ public struct TextToSpeechView: View {
                         }
                         .swipeActions(edge: .trailing, allowsFullSwipe: false) {
                             Button {
-                                presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)")
+                                presentUserDirectoryDocumentPicker(for: object.originResponse.audio, filename: "GeneratedAudio.\(object.format)")
                             } label: {
                                 Image(systemName: "square.and.arrow.down")
                             }

diff --git a/Sources/OpenAI/OpenAI.swift b/Sources/OpenAI/OpenAI.swift
@@ -182,7 +182,7 @@ extension OpenAI {
                     return completion(.failure(OpenAIError.emptyData))
                 }
 
-                completion(.success(AudioSpeechResult(audioData: data)))
+                completion(.success(AudioSpeechResult(audio: data)))
             }
             task.resume()
         } catch {

diff --git a/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
@@ -7,8 +7,9 @@
 
 import Foundation
 
+/// Generates audio from the input text.
 /// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
-public struct AudioSpeechQuery: Codable, Equatable {
+public struct AudioSpeechQuery: Codable {
 
     /// Encapsulates the voices available for audio generation.
     ///
@@ -36,15 +37,19 @@ public struct AudioSpeechQuery: Codable, Equatable {
         case aac
         case flac
     }
+
+    /// The text to generate audio for. The maximum length is 4096 characters.
+    public let input: String
     /// One of the available TTS models: tts-1 or tts-1-hd
     public let model: Model
-    /// The text to generate audio for. The maximum length is 4096 characters.
-    public let input: String?
-    /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
+    /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide.
+    /// https://platform.openai.com/docs/guides/text-to-speech/voice-options
     public let voice: AudioSpeechVoice
     /// The format to audio in. Supported formats are mp3, opus, aac, and flac.
-    public let responseFormat: AudioSpeechResponseFormat
-    /// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0**
+    /// Defaults to mp3
+    public let responseFormat: AudioSpeechResponseFormat?
+    /// The speed of the generated audio. Select a value from **0.25** to **4.0**. **1.0** is the default.
+    /// Defaults to 1
     public let speed: String?
 
     public enum CodingKeys: String, CodingKey {

diff --git a/Sources/OpenAI/Public/Models/AudioSpeechResult.swift b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift
@@ -7,8 +7,10 @@
 
 import Foundation
 
-public struct AudioSpeechResult {
-
+/// The audio file content.
+/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
+public struct AudioSpeechResult: Codable, Equatable {
+
     /// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
-    public let audioData: Data?
+    public let audio: Data
 }
diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift
@@ -7,42 +7,87 @@
 
 import Foundation
 
-public enum AudioResponseFormat: String, Codable, Equatable {
+public struct AudioTranscriptionQuery: Codable {
+
+public enum ResponseFormat: String, Codable, Equatable, CaseIterable {
     case json
     case text
     case verboseJson = "verbose_json"
     case srt
     case vtt
 }
 
-public struct AudioTranscriptionQuery: Codable, Equatable {
-    public typealias ResponseFormat = AudioResponseFormat
-
+    /// The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     public let file: Data
-    public let fileName: String
+    public let fileType: Self.FileType
+    /// ID of the model to use. Only whisper-1 is currently available.
     public let model: Model
+    /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
+    /// Defaults to json
     public let responseFormat: Self.ResponseFormat?
-
+    /// An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
     public let prompt: String?
+    /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
+    /// Defaults to 0
     public let temperature: Double?
+    /// The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
+    /// https://platform.openai.com/docs/guides/speech-to-text/prompting
     public let language: String?
-    
-    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {
+
+    public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {
         self.file = file
-        self.fileName = fileName
+        self.fileType = fileType
         self.model = model
         self.prompt = prompt
         self.temperature = temperature
         self.language = language
         self.responseFormat = responseFormat
     }
+
+    public enum FileType: String, Codable, Equatable, CaseIterable {
+        case flac
+        case mp3, mpga
+        case mp4, m4a
+        case mpeg
+        case ogg
+        case wav
+        case webm
+
+        var fileName: String { get {
+            var fileName = "speech."
+            switch self {
+            case .mpga:
+                fileName += Self.mp3.rawValue
+            case .m4a:
+                fileName += Self.mp4.rawValue
+            default:
+                fileName += self.rawValue
+            }
+
+            return fileName
+        }}
+
+        var contentType: String { get {
+            var contentType = "audio/"
+            switch self {
+            case .mpga:
+                contentType += Self.mp3.rawValue
+            case .m4a:
+                contentType += Self.mp4.rawValue
+            default:
+                contentType += self.rawValue
+            }
+
+            return contentType
+        }}
+    }
 }
 
 extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable {
 
     func encode(boundary: String) -> Data {
         let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [
-            .file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
+            .file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType),
             .string(paramName: "model", value: model),
             .string(paramName: "prompt", value: prompt),
             .string(paramName: "temperature", value: temperature),

diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift
@@ -8,6 +8,7 @@
 import Foundation
 
 public struct AudioTranscriptionResult: Codable, Equatable {
-
+
+    /// The transcribed text.
     public let text: String
 }
diff --git a/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift b/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift
@@ -7,20 +7,29 @@
 
 import Foundation
 
-public struct AudioTranslationQuery: Codable, Equatable {
-    public typealias ResponseFormat = AudioResponseFormat
-
+/// Translates audio into English.
+public struct AudioTranslationQuery: Codable {
+    public typealias FileType = AudioTranscriptionQuery.FileType
+    public typealias ResponseFormat = AudioTranscriptionQuery.ResponseFormat
+
+    /// The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     public let file: Data
-    public let fileName: String
+    public let fileType: Self.FileType
+    /// ID of the model to use. Only whisper-1 is currently available.
     public let model: Model
-
+    /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
+    /// Defaults to json
     public let responseFormat: Self.ResponseFormat?
+    /// An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.
+    /// https://platform.openai.com/docs/guides/speech-to-text/prompting
     public let prompt: String?
+    /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
+    /// Defaults to 0
     public let temperature: Double?
 
-    public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
+    public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
         self.file = file
-        self.fileName = fileName
+        self.fileType = fileType
         self.model = model
         self.prompt = prompt
         self.temperature = temperature
@@ -32,7 +41,7 @@ extension AudioTranslationQuery: MultipartFormDataBodyEncodable {
 
     func encode(boundary: String) -> Data {
         let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [
-            .file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
+            .file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType),
             .string(paramName: "model", value: model),
             .string(paramName: "prompt", value: prompt),
             .string(paramName: "response_format", value: responseFormat),