Skip to content
51 changes: 31 additions & 20 deletions packages/sdk/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@ const model = await modelRegistryGetModel(registryPath, registrySource);
Build custom model integrations with the new plugin architecture. Plugins support both request/reply and streaming patterns.

```typescript
import { invokePlugin, invokePluginStream, definePlugin, defineHandler } from "@qvac/sdk";
import {
invokePlugin,
invokePluginStream,
definePlugin,
defineHandler,
} from "@qvac/sdk";

// Invoke a plugin handler
const result = await invokePlugin<MyResponse>({
Expand Down Expand Up @@ -157,7 +162,7 @@ const modelId = await loadModel({
ttsLatentDenoiserSrc,
ttsVoiceDecoderSrc,
ttsVoiceSrc,
ttsSpeed: 1.0, // Playback speed
ttsSpeed: 1.0, // Playback speed
ttsNumInferenceSteps: 5, // Quality vs speed tradeoff
},
});
Expand Down Expand Up @@ -198,7 +203,7 @@ Map between engine names and addon types:
import { resolveCanonicalEngine, getAddonFromEngine } from "@qvac/sdk";

const engine = resolveCanonicalEngine("@qvac/llm-llamacpp"); // "llamacpp-completion"
const addon = getAddonFromEngine("llamacpp-completion"); // "llm"
const addon = getAddonFromEngine("llamacpp-completion"); // "llm"
```

---
Expand Down Expand Up @@ -300,6 +305,7 @@ await ragSaveEmbeddings({
```

Other RAG changes:

- `ragSaveEmbeddings` no longer returns `droppedIndices`
- `ragDeleteEmbeddings` now returns `void` instead of `boolean` (throws on failure)
- `ragDeleteEmbeddings` no longer requires `modelId` (uses cached workspace)
Expand Down Expand Up @@ -365,7 +371,7 @@ const modelId = await loadModel({
modelSrc: MARIAN_OPUS_EN_IT_Q0F32,
modelType: "nmt",
modelConfig: {
engine: "Opus", // Required: "Opus" | "Bergamot" | "IndicTrans"
engine: "Opus", // Required: "Opus" | "Bergamot" | "IndicTrans"
from: "en",
to: "it",
},
Expand All @@ -390,7 +396,7 @@ const modelId = await loadModel({
engine: "Bergamot",
from: "en",
to: "fr",
normalize: 1, // Bergamot-specific option
normalize: 1, // Bergamot-specific option
},
});

Expand Down Expand Up @@ -433,7 +439,11 @@ const result = await blocks;
await done;

// Or stream blocks as they're detected
const { blockStream, done } = ocr({ modelId, image: imageBuffer, stream: true });
const { blockStream, done } = ocr({
modelId,
image: imageBuffer,
stream: true,
});
for await (const blocks of blockStream) {
console.log(blocks);
// [{ text: "Hello", bbox: [10, 20, 100, 50], confidence: 0.95 }]
Expand All @@ -447,7 +457,8 @@ Load large models split across multiple files, from URLs or archives.
```typescript
// Pattern-based sharded URLs (auto-detects shard pattern)
await loadModel({
modelSrc: "https://huggingface.co/user/model/resolve/main/model-00001-of-00003.gguf",
modelSrc:
"https://huggingface.co/user/model/resolve/main/model-00001-of-00003.gguf",
modelType: "llm",
});

Expand Down Expand Up @@ -568,10 +579,10 @@ The SDK searches for configuration in this order:

#### Supported Formats

| Format | Filename | Notes |
| ---------- | ------------------ | ---------------------------- |
| JSON | `qvac.config.json` | Simplest option |
| JavaScript | `qvac.config.js` | Use `export default` |
| Format | Filename | Notes |
| ---------- | ------------------ | ----------------------------- |
| JSON | `qvac.config.json` | Simplest option |
| JavaScript | `qvac.config.js` | Use `export default` |
| TypeScript | `qvac.config.ts` | Fully typed with `QvacConfig` |

**TypeScript example:**
Expand All @@ -592,7 +603,7 @@ export default config;

1. Remove all `setConfig()` calls from your code
2. Create a config file in your project root
3. *(Optional)* For non-standard locations, set `QVAC_CONFIG_PATH` before importing the SDK
3. _(Optional)_ For non-standard locations, set `QVAC_CONFIG_PATH` before importing the SDK

---

Expand All @@ -602,14 +613,14 @@ Some model constants have been renamed for clarity, and duplicate constants have

**Changes:**

| Before | After |
| -------------------------- | ------------------------------------------------- |
| `WHISPER_SMALL` | `WHISPER_SMALL_Q8` |
| `WHISPER_NORWEGIAN_TINY_1` | *(removed — use `WHISPER_NORWEGIAN_TINY`)* |
| `WHISPER_TINY_SILERO` | *(removed — use `WHISPER_TINY`)* |
| `MARIAN_OPUS_EN_FR_Q4_0_1` | *(removed — use `MARIAN_OPUS_EN_FR_Q4_0`)* |
| `MARIAN_OPUS_FR_EN_Q4_0_1` | *(removed — use `MARIAN_OPUS_FR_EN_Q4_0`)* |
| `MARIAN_OPUS_IT_EN` | *(removed — use `MARIAN_OPUS_EN_IT`)* |
| Before | After |
| -------------------------- | ------------------------------------------ |
| `WHISPER_SMALL` | `WHISPER_SMALL_Q8` |
| `WHISPER_NORWEGIAN_TINY_1` | _(removed — use `WHISPER_NORWEGIAN_TINY`)_ |
| `WHISPER_TINY_SILERO` | _(removed — use `WHISPER_TINY`)_ |
| `MARIAN_OPUS_EN_FR_Q4_0_1` | _(removed — use `MARIAN_OPUS_EN_FR_Q4_0`)_ |
| `MARIAN_OPUS_FR_EN_Q4_0_1` | _(removed — use `MARIAN_OPUS_FR_EN_Q4_0`)_ |
| `MARIAN_OPUS_IT_EN` | _(removed — use `MARIAN_OPUS_EN_IT`)_ |

All model metadata and hyperdrive keys remain unchanged—only the constant names were affected.

Expand Down
8 changes: 7 additions & 1 deletion packages/sdk/client/api/completion-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
type Tool,
type ToolCallEvent,
type ToolCallWithCall,
type RPCOptions,
} from "@/schemas";
import { getMcpToolsWithHandlers } from "@/utils/mcp-adapter";
import {
Expand All @@ -23,6 +24,7 @@ const logger = getClientLogger();
type CompletionParams = Omit<CompletionClientParams, "tools"> & {
tools?: Tool[] | ToolInput[];
mcp?: McpClientInput[];
rpcOptions?: RPCOptions;
};

/**
Expand Down Expand Up @@ -154,7 +156,11 @@ export function completion(params: CompletionParams): {
stream: params.stream ?? true,
};

const responses: AsyncGenerator<unknown> = streamRpc(request);
const responses: AsyncGenerator<unknown> = streamRpc(
request,
undefined,
params.rpcOptions,
);

for await (const response of responses) {
if (
Expand Down
27 changes: 17 additions & 10 deletions packages/sdk/client/api/embed.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { send } from "@/client/rpc/rpc-client";
import { type EmbedParams, type EmbedRequest } from "@/schemas";
import {
type EmbedParams,
type EmbedRequest,
type RPCOptions,
} from "@/schemas";
import { InvalidResponseError } from "@/utils/errors-client";

/**
Expand All @@ -8,35 +12,38 @@ import { InvalidResponseError } from "@/utils/errors-client";
* @param params - The parameters for the embedding
* @param params.modelId - The identifier of the embedding model to use
* @param params.text - The input text to embed
* @param options - Optional RPC options including per-call profiling
* @throws {QvacErrorBase} When the response type is invalid or when the embedding fails
*/
export async function embed(params: {
modelId: string;
text: string;
}): Promise<number[]>;
export async function embed(
params: { modelId: string; text: string },
options?: RPCOptions,
): Promise<number[]>;

/**
* Generates embeddings for multiple texts using a specified model.
*
* @param params - The parameters for the embedding
* @param params.modelId - The identifier of the embedding model to use
* @param params.text - The input texts to embed
* @param options - Optional RPC options including per-call profiling
* @throws {QvacErrorBase} When the response type is invalid or when the embedding fails
*/
export async function embed(params: {
modelId: string;
text: string[];
}): Promise<number[][]>;
export async function embed(
params: { modelId: string; text: string[] },
options?: RPCOptions,
): Promise<number[][]>;

export async function embed(
params: EmbedParams,
options?: RPCOptions,
): Promise<number[] | number[][]> {
const request: EmbedRequest = {
type: "embed",
...params,
};

const response = await send(request);
const response = await send(request, undefined, options);
if (response.type !== "embed") {
throw new InvalidResponseError("embed");
}
Expand Down
13 changes: 9 additions & 4 deletions packages/sdk/client/api/invoke-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ import type {
PluginInvokeRequest,
PluginInvokeStreamRequest,
PluginInvokeStreamResponse,
} from "@/schemas/plugin";
RPCOptions,
} from "@/schemas";
import { InvalidResponseError } from "@/utils/errors-client";

export interface InvokePluginOptions<TParams = unknown> {
Expand All @@ -17,6 +18,7 @@ export interface InvokePluginOptions<TParams = unknown> {
*/
export async function invokePlugin<TResponse = unknown, TParams = unknown>(
options: InvokePluginOptions<TParams>,
rpcOptions?: RPCOptions,
): Promise<TResponse> {
const request: PluginInvokeRequest = {
type: "pluginInvoke",
Expand All @@ -25,7 +27,7 @@ export async function invokePlugin<TResponse = unknown, TParams = unknown>(
params: options.params,
};

const response = await send(request);
const response = await send(request, undefined, rpcOptions);

if (response.type !== "pluginInvoke") {
throw new InvalidResponseError("pluginInvoke");
Expand All @@ -40,15 +42,18 @@ export async function invokePlugin<TResponse = unknown, TParams = unknown>(
export async function* invokePluginStream<
TResponse = unknown,
TParams = unknown,
>(options: InvokePluginOptions<TParams>): AsyncGenerator<TResponse> {
>(
options: InvokePluginOptions<TParams>,
rpcOptions?: RPCOptions,
): AsyncGenerator<TResponse> {
const request: PluginInvokeStreamRequest = {
type: "pluginInvokeStream",
modelId: options.modelId,
handler: options.handler,
params: options.params,
};

for await (const chunk of stream(request)) {
for await (const chunk of stream(request, undefined, rpcOptions)) {
const response = chunk as PluginInvokeStreamResponse;
if (response.type !== "pluginInvokeStream") {
throw new InvalidResponseError("pluginInvokeStream");
Expand Down
Loading
Loading