Skip to content

Commit f3755b4

Browse files
authored
Add DeepSeek VL 7B support from replicate (#112)
1 parent d0e3514 commit f3755b4

File tree

5 files changed

+117
-2
lines changed

5 files changed

+117
-2
lines changed

README.md

+45
Original file line numberDiff line numberDiff line change
@@ -2564,6 +2564,51 @@ See the full range of controls for generating an image by viewing `ReplicateSDXL
25642564

25652565
See the full range of controls for generating an image by viewing `ReplicateSDXLFreshInkInputSchema.swift`
25662566

2567+
### How to call DeepSeek's 7B vision model on replicate
2568+
2569+
Add a file called 'my-image.jpg' to Xcode app assets. Then run this snippet:
2570+
2571+
```swift
2572+
import AIProxy
2573+
2574+
/* Uncomment for BYOK use cases */
2575+
// let replicateService = AIProxy.replicateDirectService(
2576+
// unprotectedAPIKey: "your-replicate-key"
2577+
// )
2578+
2579+
/* Uncomment for all other production use cases */
2580+
// let replicateService = AIProxy.replicateService(
2581+
// partialKey: "partial-key-from-your-developer-dashboard",
2582+
// serviceURL: "service-url-from-your-developer-dashboard"
2583+
// )
2584+
2585+
guard let image = NSImage(named: "my-image") else {
2586+
print("Could not find an image named 'my-image' in your app assets")
2587+
return
2588+
}
2589+
2590+
guard let imageURL = AIProxy.encodeImageAsURL(image: image, compressionQuality: 0.4) else {
2591+
print("Could not encode image as a data URI")
2592+
return
2593+
}
2594+
2595+
do {
2596+
let input = ReplicateDeepSeekVL7BInputSchema(
2597+
image: imageURL,
2598+
prompt: "What are the colors in this pic"
2599+
)
2600+
let description = try await replicateService.runDeepSeekVL7B(input: input, secondsToWait: 300)
2601+
print("Done getting descriptions from DeepSeekVL7B: ", description)
2602+
} catch AIProxyError.unsuccessfulRequest(let statusCode, let responseBody) {
2603+
print("Received \(statusCode) status code with response body: \(responseBody)")
2604+
} catch {
2605+
// You may want to catch additional Foundation errors and pop the appropriate UI
2606+
// to the user. See "How to catch Foundation errors for specific conditions" here:
2607+
// https://www.aiproxy.com/docs/integration-options.html
2608+
print("Could not use deepseek vision on replicate: \(error.localizedDescription)")
2609+
}
2610+
```
2611+
25672612

25682613
### How to call your own models on Replicate.
25692614

Sources/AIProxy/AIProxy.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import UIKit
88
public struct AIProxy {
99

1010
/// The current sdk version
11-
public static let sdkVersion = "0.74.0"
11+
public static let sdkVersion = "0.75.0"
1212

1313
/// - Parameters:
1414
/// - partialKey: Your partial key is displayed in the AIProxy dashboard when you submit your provider's key.

Sources/AIProxy/OpenAI/OpenAIRealtimeSession.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ open class OpenAIRealtimeSession {
2323
self.sessionConfiguration = sessionConfiguration
2424

2525
Task {
26-
try await self.sendMessage(OpenAIRealtimeSessionUpdate(session: self.sessionConfiguration))
26+
await self.sendMessage(OpenAIRealtimeSessionUpdate(session: self.sessionConfiguration))
2727
}
2828
self.webSocketTask.resume()
2929
self.receiveMessage()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//
2+
// ReplicateDeepSeekVL7BInputSchema.swift
3+
// AIProxy
4+
//
5+
// Created by Lou Zell on 3/5/25.
6+
//
7+
8+
import Foundation
9+
10+
/// https://replicate.com/deepseek-ai/deepseek-vl-7b-base?output=json
11+
public struct ReplicateDeepSeekVL7BInputSchema: Encodable {
12+
// Required
13+
14+
/// Input image
15+
public let image: URL
16+
17+
// Optional
18+
19+
/// Maximum number of tokens to generate
20+
/// Default: 512
21+
public let maxNewTokens: Int?
22+
23+
/// Input prompt
24+
/// Default: "Describe this image"
25+
public let prompt: String?
26+
27+
28+
private enum CodingKeys: String, CodingKey {
29+
case image
30+
case maxNewTokens = "max_new_tokens"
31+
case prompt
32+
}
33+
34+
// This memberwise initializer is autogenerated.
35+
// To regenerate, use `cmd-shift-a` > Generate Memberwise Initializer
36+
// To format, place the cursor in the initializer's parameter list and use `ctrl-m`
37+
public init(
38+
image: URL,
39+
maxNewTokens: Int? = nil,
40+
prompt: String? = nil
41+
) {
42+
self.image = image
43+
self.maxNewTokens = maxNewTokens
44+
self.prompt = prompt
45+
}
46+
}

Sources/AIProxy/Replicate/ReplicateService+Convenience.swift

+24
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,30 @@ extension ReplicateService {
211211
return try await self.getPredictionOutput(prediction)
212212
}
213213

214+
/// Convenience method for running the DeepSeek 7B vision model:
215+
/// https://replicate.com/deepseek-ai/deepseek-vl-7b-base
216+
///
217+
/// In my testing, vision requests were completing in less than 2 seconds once the model was warm.
218+
/// Note that the result can take several minutes if the model is cold.
219+
///
220+
/// - Parameters:
221+
/// - input: The input containing the image and prompt that you want to have DeepSeek VL inspect
222+
/// - secondsToWait: Seconds to wait before raising a timeout error
223+
///
224+
/// - Returns: The generated content
225+
public func runDeepSeekVL7B(
226+
input: ReplicateDeepSeekVL7BInputSchema,
227+
version: String = "d1823e6f68cd3d57f2d315b9357dfa85f53817120ae0de8d2b95fbc8e93a1385",
228+
secondsToWait: UInt
229+
) async throws -> String {
230+
let prediction: ReplicatePrediction<[String]> = try await self.runCommunityModel(
231+
version: version,
232+
input: input,
233+
secondsToWait: secondsToWait
234+
)
235+
return (try await self.getPredictionOutput(prediction)).joined(separator: "")
236+
}
237+
214238
// MARK: - Deprecated
215239
@available(*, deprecated, message: "Please use createFluxSchnellImages")
216240
public func createFluxSchnellImageURLs(

0 commit comments

Comments
 (0)