DeepSeek R1 usage improvements (#101)

lzell · web-flow · commit 047861de20f2 · 2025-02-06T17:06:06.000-08:00
diff --git a/README.md b/README.md
diff --git a/Sources/AIProxy/DeepSeek/DeepSeekDirectService.swift b/Sources/AIProxy/DeepSeek/DeepSeekDirectService.swift
@@ -23,15 +23,17 @@ open class DeepSeekDirectService: DeepSeekService, DirectService {
     /// - Parameters:
     ///   - body: The request body to send to DeepSeek. See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: The chat response. See this reference:
     ///            https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func chatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> DeepSeekChatCompletionResponseBody {
         var body = body
         body.stream = false
         body.streamOptions = nil
-        let request = try AIProxyURLRequest.createDirect(
+        var request = try AIProxyURLRequest.createDirect(
             baseURL: "https://api.deepseek.com",
             path: "/chat/completions",
             body: try body.serialize(),
@@ -42,6 +44,7 @@ open class DeepSeekDirectService: DeepSeekService, DirectService {
                 "Accept": "application/json"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeResponse(request)
     }
 
@@ -50,15 +53,17 @@ open class DeepSeekDirectService: DeepSeekService, DirectService {
     /// - Parameters:
     ///   - body: The request body to send to DeepSeek.  See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: An async sequence of completion chunks. See the 'Streaming' tab here:
     ///           https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func streamingChatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, DeepSeekChatCompletionChunk> {
         var body = body
         body.stream = true
         body.streamOptions = .init(includeUsage: true)
-        let request = try AIProxyURLRequest.createDirect(
+        var request = try AIProxyURLRequest.createDirect(
             baseURL: "https://api.deepseek.com",
             path: "/chat/completions",
             body: try body.serialize(),
@@ -69,6 +74,7 @@ open class DeepSeekDirectService: DeepSeekService, DirectService {
                 "Accept": "application/json"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeStreamingChunks(request)
     }
 }
diff --git a/Sources/AIProxy/DeepSeek/DeepSeekProxiedService.swift b/Sources/AIProxy/DeepSeek/DeepSeekProxiedService.swift
@@ -29,15 +29,17 @@ open class DeepSeekProxiedService: DeepSeekService, ProxiedService {
     /// - Parameters:
     ///   - body: The request body to send to DeepSeek, protected through AIProxy. See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: The chat response. See this reference:
     ///            https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func chatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> DeepSeekChatCompletionResponseBody {
         var body = body
         body.stream = false
         body.streamOptions = nil
-        let request = try await AIProxyURLRequest.create(
+        var request = try await AIProxyURLRequest.create(
             partialKey: self.partialKey,
             serviceURL: self.serviceURL,
             clientID: self.clientID,
@@ -49,23 +51,26 @@ open class DeepSeekProxiedService: DeepSeekService, ProxiedService {
                 "Accept": "application/json"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeResponse(request)
     }
 
     /// Initiates a streaming chat completion request to /chat/completions.
     ///
     /// - Parameters:
-    ///   - body: The request body to send to DeepSeek.  See this reference:
+    ///   - body: The request body to send to DeepSeek, protected through AIProxy.  See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: An async sequence of completion chunks. See the 'Streaming' tab here:
     ///           https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func streamingChatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, DeepSeekChatCompletionChunk> {
         var body = body
         body.stream = true
         body.streamOptions = .init(includeUsage: true)
-        let request = try await AIProxyURLRequest.create(
+        var request = try await AIProxyURLRequest.create(
             partialKey: self.partialKey,
             serviceURL: self.serviceURL,
             clientID: self.clientID,
@@ -77,6 +82,7 @@ open class DeepSeekProxiedService: DeepSeekService, ProxiedService {
                 "Accept": "application/json"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeStreamingChunks(request)
     }
 }
diff --git a/Sources/AIProxy/DeepSeek/DeepSeekService.swift b/Sources/AIProxy/DeepSeek/DeepSeekService.swift
@@ -14,20 +14,38 @@ public protocol DeepSeekService {
     /// - Parameters:
     ///   - body: The request body to send to DeepSeek. See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: The chat response. See this reference:
     ///            https://api-docs.deepseek.com/api/create-chat-completion#responses
     func chatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> DeepSeekChatCompletionResponseBody
 
     /// Initiates a streaming chat completion request to /chat/completions.
     ///
     /// - Parameters:
     ///   - body: The request body to send to DeepSeek.  See this reference:
     ///           https://api-docs.deepseek.com/api/create-chat-completion
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: An async sequence of completion chunks. See the 'Streaming' tab here:
     ///           https://api-docs.deepseek.com/api/create-chat-completion#responses
     func streamingChatCompletionRequest(
-        body: DeepSeekChatCompletionRequestBody
+        body: DeepSeekChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, DeepSeekChatCompletionChunk>
 }
+
+extension DeepSeekService {
+    public func chatCompletionRequest(
+        body: DeepSeekChatCompletionRequestBody
+    ) async throws -> DeepSeekChatCompletionResponseBody {
+        return try await self.chatCompletionRequest(body: body, secondsToWait: 60)
+    }
+
+    public func streamingChatCompletionRequest(
+        body: DeepSeekChatCompletionRequestBody
+    ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, DeepSeekChatCompletionChunk> {
+        return try await self.streamingChatCompletionRequest(body: body, secondsToWait: 60)
+    }
+}
diff --git a/Sources/AIProxy/FireworksAI/FireworksAIProxiedService.swift b/Sources/AIProxy/FireworksAI/FireworksAIProxiedService.swift
@@ -31,7 +31,7 @@ open class FireworksAIProxiedService: FireworksAIService, ProxiedService {
     ///   - body: The request body to send to FireworksAI. See these references:
     ///           https://fireworks.ai/models/fireworks/deepseek-r1
     ///           https://api-docs.deepseek.com/api/create-chat-completion
-    ///   - secondsToWait: The number of seconds to wait before timing out
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: The chat response. See this reference:
     ///            https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func deepSeekR1Request(
@@ -63,7 +63,7 @@ open class FireworksAIProxiedService: FireworksAIService, ProxiedService {
     ///   - body: The request body to send to FireworksAI.  See these references:
     ///           https://fireworks.ai/models/fireworks/deepseek-r1
     ///           https://api-docs.deepseek.com/api/create-chat-completion
-    ///   - secondsToWait: The number of seconds to wait before timing out
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     /// - Returns: An async sequence of completion chunks. See the 'Streaming' tab here:
     ///           https://api-docs.deepseek.com/api/create-chat-completion#responses
     public func streamingDeepSeekR1Request(
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionChunk.swift b/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionChunk.swift
@@ -39,6 +39,11 @@ extension OpenRouterChatCompletionChunk {
 extension OpenRouterChatCompletionChunk.Choice {
     public struct Delta: Codable {
         public let role: String
-        public let content: String
+
+        /// Output content. For reasoning models, these chunks arrive after `reasoning` has finished.
+        public let content: String?
+
+        /// Reasoning content. For reasoning models, these chunks arrive before `content`.
+        public let reasoning: String?
     }
 }
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionRequestBody.swift b/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionRequestBody.swift
@@ -25,6 +25,9 @@ public struct OpenRouterChatCompletionRequestBody: Encodable {
     /// Defaults to 0
     public let frequencyPenalty: Double?
 
+    /// Include reasoning content in the response. Useful to understand how a reasoning model arrives at its response.
+    public let includeReasoning: Bool?
+
     /// Modify the likelihood of specified tokens appearing in the completion.
     /// Accepts an object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
     public let logitBias: [String: Double]?
@@ -157,6 +160,7 @@ public struct OpenRouterChatCompletionRequestBody: Encodable {
 
         // optional
         case frequencyPenalty = "frequency_penalty"
+        case includeReasoning = "include_reasoning"
         case logitBias = "logit_bias"
         case logprobs
         case maxTokens = "max_tokens"
@@ -192,6 +196,7 @@ public struct OpenRouterChatCompletionRequestBody: Encodable {
     public init(
         messages: [OpenRouterChatCompletionRequestBody.Message],
         frequencyPenalty: Double? = nil,
+        includeReasoning: Bool? = nil,
         logitBias: [String : Double]? = nil,
         logprobs: Bool? = nil,
         maxTokens: Int? = nil,
@@ -222,6 +227,7 @@ public struct OpenRouterChatCompletionRequestBody: Encodable {
     ) {
         self.messages = messages
         self.frequencyPenalty = frequencyPenalty
+        self.includeReasoning = includeReasoning
         self.logitBias = logitBias
         self.logprobs = logprobs
         self.maxTokens = maxTokens
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionResponseBody.swift b/Sources/AIProxy/OpenRouter/OpenRouterChatCompletionResponseBody.swift
@@ -78,6 +78,9 @@ extension OpenRouterChatCompletionResponseBody.Choice {
         /// The contents of the message.
         public let content: String?
 
+        /// Reasoning models such as R1 will populate this field with the reasoning used to arrive at `content`
+        public let reasoning: String?
+
         /// The role of the author of this message.
         public let role: String
 
@@ -86,6 +89,7 @@ extension OpenRouterChatCompletionResponseBody.Choice {
 
         private enum CodingKeys: String, CodingKey {
             case content
+            case reasoning
             case role
             case toolCalls = "tool_calls"
         }
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterDirectService.swift b/Sources/AIProxy/OpenRouter/OpenRouterDirectService.swift
@@ -21,16 +21,18 @@ open class OpenRouterDirectService: OpenRouterService, DirectService {
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     public func chatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> OpenRouterChatCompletionResponseBody {
         var body = body
         body.stream = false
         body.streamOptions = nil
-        let request = try AIProxyURLRequest.createDirect(
+        var request = try AIProxyURLRequest.createDirect(
             baseURL: "https://openrouter.ai",
             path: "/api/v1/chat/completions",
             body: body.serialize(),
@@ -40,6 +42,7 @@ open class OpenRouterDirectService: OpenRouterService, DirectService {
                 "Authorization": "Bearer \(self.unprotectedAPIKey)"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeResponse(request)
     }
 
@@ -48,16 +51,18 @@ open class OpenRouterDirectService: OpenRouterService, DirectService {
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     public func streamingChatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, OpenRouterChatCompletionChunk> {
         var body = body
         body.stream = true
         body.streamOptions = .init(includeUsage: true)
-        let request = try AIProxyURLRequest.createDirect(
+        var request = try AIProxyURLRequest.createDirect(
             baseURL: "https://openrouter.ai",
             path: "/api/v1/chat/completions",
             body: try body.serialize(),
@@ -67,6 +72,7 @@ open class OpenRouterDirectService: OpenRouterService, DirectService {
                 "Authorization": "Bearer \(self.unprotectedAPIKey)"
             ]
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeStreamingChunks(request)
     }
 }
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterProxiedService.swift b/Sources/AIProxy/OpenRouter/OpenRouterProxiedService.swift
@@ -29,16 +29,18 @@ open class OpenRouterProxiedService: OpenRouterService, ProxiedService {
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     public func chatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> OpenRouterChatCompletionResponseBody {
         var body = body
         body.stream = false
         body.streamOptions = nil
-        let request = try await AIProxyURLRequest.create(
+        var request = try await AIProxyURLRequest.create(
             partialKey: self.partialKey,
             serviceURL: self.serviceURL,
             clientID: self.clientID,
@@ -47,6 +49,7 @@ open class OpenRouterProxiedService: OpenRouterService, ProxiedService {
             verb: .post,
             contentType: "application/json"
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeResponse(request)
     }
 
@@ -55,16 +58,18 @@ open class OpenRouterProxiedService: OpenRouterService, ProxiedService {
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     public func streamingChatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, OpenRouterChatCompletionChunk> {
         var body = body
         body.stream = true
         body.streamOptions = .init(includeUsage: true)
-        let request = try await AIProxyURLRequest.create(
+        var request = try await AIProxyURLRequest.create(
             partialKey: self.partialKey,
             serviceURL: self.serviceURL,
             clientID: self.clientID,
@@ -73,6 +78,7 @@ open class OpenRouterProxiedService: OpenRouterService, ProxiedService {
             verb: .post,
             contentType: "application/json"
         )
+        request.timeoutInterval = TimeInterval(secondsToWait)
         return try await self.makeRequestAndDeserializeStreamingChunks(request)
     }
 }
diff --git a/Sources/AIProxy/OpenRouter/OpenRouterService.swift b/Sources/AIProxy/OpenRouter/OpenRouterService.swift
@@ -14,22 +14,40 @@ public protocol OpenRouterService {
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     func chatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> OpenRouterChatCompletionResponseBody
 
     /// Initiates a streaming chat completion request to /api/v1/chat/completions.
     ///
     /// - Parameters:
     ///   - body: The request body to send to OpenRouter through AIProxy. See this reference:
     ///   https://openrouter.ai/docs/requests
+    ///   - secondsToWait: The amount of time to wait before `URLError.timedOut` is raised
     ///
     /// - Returns: The response body from OpenRouter. See this reference:
     ///            https://openrouter.ai/docs/responses
     func streamingChatCompletionRequest(
-        body: OpenRouterChatCompletionRequestBody
+        body: OpenRouterChatCompletionRequestBody,
+        secondsToWait: Int
     ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, OpenRouterChatCompletionChunk>
 }
+
+extension OpenRouterService {
+    public func chatCompletionRequest(
+        body: OpenRouterChatCompletionRequestBody
+    ) async throws -> OpenRouterChatCompletionResponseBody {
+        return try await self.chatCompletionRequest(body: body, secondsToWait: 60)
+    }
+
+    public func streamingChatCompletionRequest(
+        body: OpenRouterChatCompletionRequestBody
+    ) async throws -> AsyncCompactMapSequence<AsyncLineSequence<URLSession.AsyncBytes>, OpenRouterChatCompletionChunk> {
+        return try await self.streamingChatCompletionRequest(body: body, secondsToWait: 60)
+    }
+}
diff --git a/Tests/AIProxyTests/OpenAIChatCompletionRequestTests.swift b/Tests/AIProxyTests/OpenAIChatCompletionRequestTests.swift
diff --git a/Tests/AIProxyTests/OpenRouterChatCompletionStreamingChunkTests.swift b/Tests/AIProxyTests/OpenRouterChatCompletionStreamingChunkTests.swift
diff --git a/Tests/AIProxyTests/ReplicateSyncAPIResponseBodyTests.swift b/Tests/AIProxyTests/ReplicateSyncAPIResponseBodyTests.swift

Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,11 @@ extension OpenRouterChatCompletionChunk {`
`39`	`39`	`extension OpenRouterChatCompletionChunk.Choice {`
`40`	`40`	`public struct Delta: Codable {`
`41`	`41`	`public let role: String`
`42`		`- public let content: String`
	`42`	`+`
	`43`	+ /// Output content. For reasoning models, these chunks arrive after `reasoning` has finished.
	`44`	`+ public let content: String?`
	`45`	`+`
	`46`	+ /// Reasoning content. For reasoning models, these chunks arrive before `content`.
	`47`	`+ public let reasoning: String?`
`43`	`48`	`}`
`44`	`49`	`}`