Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions packages/sdk/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,38 @@
# Changelog

## [0.9.0]

### Breaking Changes

#### `embed()` now returns `{ embedding, stats }` instead of raw vectors

The `embed()` client function now returns `{ embedding, stats? }` instead of `number[] | number[][]`, matching the pattern used by `completionStream()`, `diffusion()`, `translate()`, and `ocr()`.

**Before:**

```typescript
const vectors = await embed({ modelId, text: "hello" });
```

**After:**

```typescript
const { embedding, stats } = await embed({ modelId, text: "hello" });
```

### Features

- **`backendDevice` stat** β€” `completionStatsSchema` and `embedStatsSchema` now include an optional `backendDevice: "cpu" | "gpu"` field, reporting which compute backend the addon used for inference.
- **New config fields** β€” `openclCacheDir` (string), `cache-type-k` (string), and `cache-type-v` (string) added to `llmConfigBaseSchema`. `openclCacheDir` also added to `embedConfigBaseSchema`. These mirror the inputs introduced by `@qvac/llm-llamacpp@0.14.4` and `@qvac/embed-llamacpp@0.13.4`.
- **`EmbedStats` type** exported from SDK root for typed consumption of embed runtime stats.

### Dependency Changes

- `@qvac/embed-llamacpp`: `^0.12.0` β†’ `^0.13.4`
- `@qvac/llm-llamacpp`: `^0.12.1` β†’ `^0.14.4`

---

Comment thread
opaninakuffo marked this conversation as resolved.
Outdated
## [0.8.3]

πŸ“¦ **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.8.3
Expand Down
8 changes: 4 additions & 4 deletions packages/sdk/bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 10 additions & 4 deletions packages/sdk/client/api/embed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ import { send } from "@/client/rpc/rpc-client";
import {
type EmbedParams,
type EmbedRequest,
type EmbedStats,
type RPCOptions,
} from "@/schemas";
import { InvalidResponseError } from "@/utils/errors-client";

export type { EmbedStats };
Comment thread
opaninakuffo marked this conversation as resolved.
Outdated

/**
* Generates embeddings for a single text using a specified model.
*
Expand All @@ -18,7 +21,7 @@ import { InvalidResponseError } from "@/utils/errors-client";
export async function embed(
params: { modelId: string; text: string },
options?: RPCOptions,
): Promise<number[]>;
): Promise<{ embedding: number[]; stats?: EmbedStats }>;

/**
* Generates embeddings for multiple texts using a specified model.
Expand All @@ -32,12 +35,12 @@ export async function embed(
export async function embed(
params: { modelId: string; text: string[] },
options?: RPCOptions,
): Promise<number[][]>;
): Promise<{ embedding: number[][]; stats?: EmbedStats }>;

export async function embed(
params: EmbedParams,
options?: RPCOptions,
): Promise<number[] | number[][]> {
): Promise<{ embedding: number[] | number[][]; stats?: EmbedStats }> {
const request: EmbedRequest = {
type: "embed",
...params,
Expand All @@ -48,5 +51,8 @@ export async function embed(
throw new InvalidResponseError("embed");
}

return response.embedding;
return {
embedding: response.embedding,
...(response.stats !== undefined && { stats: response.stats }),
};
}
2 changes: 1 addition & 1 deletion packages/sdk/client/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export { unloadModel } from "./unload-model";
export { loggingStream } from "./logging-stream";
export { heartbeat } from "./heartbeat";
export { transcribe, transcribeStream } from "./transcribe";
export { embed } from "./embed";
export { embed, type EmbedStats } from "./embed";
Comment thread
opaninakuffo marked this conversation as resolved.
Outdated
export { finetune, type FinetuneHandle } from "./finetune";
export { translate } from "./translate";
export { cancel } from "./cancel";
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/client/api/rag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ export async function ragIngest(
* ```typescript
* // Segregated flow
* const chunks = await ragChunk({ documents: ["text1", "text2"] });
* const embeddings = await embed({ modelId, text: chunks.map(c => c.content) });
* const { embedding: embeddings } = await embed({ modelId, text: chunks.map(c => c.content) });
* const embeddedDocs = chunks.map((chunk, i) => ({
* ...chunk,
* embedding: embeddings[i],
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/examples/embed-p2p.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ try {
console.log("\nπŸ“ Example 1: Single Text Embedding");
console.log("=".repeat(50));

const singleEmbedding = await embed({ modelId, text: "Hello, world!" });
const { embedding: singleEmbedding } = await embed({ modelId, text: "Hello, world!" });

console.log("Input: 'Hello, world!'");
console.log("Embedding dimensions:", singleEmbedding.length);
Expand All @@ -39,7 +39,7 @@ try {
"Python is a programming language",
];

const batchEmbeddings = await embed({ modelId, text: texts });
const { embedding: batchEmbeddings } = await embed({ modelId, text: texts });

console.log("Input: Array of", texts.length, "texts");
console.log("Output: Array of", batchEmbeddings.length, "embeddings");
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/examples/logging-streaming.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ try {
history: messages,
stream: true,
});
const embedding = await embed({
const { embedding } = await embed({
modelId: embedModelId,
text: messages[0]?.content ?? "Hello, world!",
});
Expand Down
6 changes: 3 additions & 3 deletions packages/sdk/examples/profiling/per-call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ try {
console.log("Model loaded:", modelId);

console.log("\n=== Embed with per-call profiling ===");
const embedding1 = await embed(
const { embedding: embedding1 } = await embed(
{ modelId, text: "Profile this specific call" },
{ profiling: { enabled: true, includeServerBreakdown: true } },
);
console.log("Embedding dimensions:", embedding1.length);

console.log("\n=== Embed without profiling ===");
const embedding2 = await embed({
const { embedding: embedding2 } = await embed({
modelId,
text: "This call is not profiled",
});
console.log("Embedding dimensions:", embedding2.length);

console.log("\n=== Embed with profiling explicitly disabled ===");
const embedding3 = await embed(
const { embedding: embedding3 } = await embed(
{ modelId, text: "Profiling explicitly disabled for this call" },
{ profiling: { enabled: false } },
);
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/examples/rag/rag-chromadb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ try {
for (const s of samples) {
ids.push(String(s.id));
documents.push(s.text);
embeddings.push(await embed({ modelId, text: s.text }));
embeddings.push((await embed({ modelId, text: s.text })).embedding);
}

await collection.add({
Expand All @@ -115,7 +115,7 @@ try {
});

console.log("πŸ”Ž Searching for similar documents...");
const queryEmbedding = await embed({ modelId, text: query });
const { embedding: queryEmbedding } = await embed({ modelId, text: query });

// Query top 3 by vector similarity and include distances
const res = await collection.query({
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/examples/rag/rag-hyperdb/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ try {

console.log("\n🧠 Step 2: Generating embeddings (batch)...");
const texts = chunks.map((chunk) => chunk.content);
const embeddings = await embed({ modelId, text: texts });
const { embedding: embeddings } = await embed({ modelId, text: texts });

const embeddedDocs = chunks.map((chunk, i) => ({
id: chunk.id,
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/examples/rag/rag-lancedb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ try {
console.log("πŸ“š Embedding documents...");
const documents = [];
for (const sample of samples) {
const embedding = await embed({ modelId, text: sample.text });
const { embedding } = await embed({ modelId, text: sample.text });
const record = {
id: sample.id,
text: sample.text,
Expand All @@ -85,7 +85,7 @@ try {
await documentsTable.add(documents);

console.log("πŸ”Ž Searching for similar documents...");
const queryEmbedding = await embed({ modelId, text: query });
const { embedding: queryEmbedding } = await embed({ modelId, text: query });
const results = (await documentsTable
.vectorSearch(queryEmbedding)
.limit(1)
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/examples/rag/rag-sqlite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ try {

console.log("πŸ“š Embedding documents...");
for (const sample of samples) {
const embedding = await embed({ modelId, text: sample.text });
const { embedding } = await embed({ modelId, text: sample.text });
db.exec({
sql: "INSERT INTO documents VALUES (?, ?, vector_as_f32(?))",
bind: [sample.id, sample.text, JSON.stringify(embedding)],
Expand All @@ -84,7 +84,7 @@ try {

// Search for similar documents
console.log("πŸ”Ž Searching for similar documents...");
const queryEmbedding = await embed({ modelId, text: query });
const { embedding: queryEmbedding } = await embed({ modelId, text: query });

const results: Array<{
id: number;
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export {
transcribe,
transcribeStream,
embed,
type EmbedStats,
finetune,
translate,
cancel,
Expand Down
6 changes: 3 additions & 3 deletions packages/sdk/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@qvac/sdk",
"version": "0.8.3",
"version": "0.9.0",
Comment thread
opaninakuffo marked this conversation as resolved.
Outdated
"license": "Apache-2.0",
"repository": {
"type": "git",
Expand Down Expand Up @@ -175,10 +175,10 @@
"@qvac/decoder-audio": "^0.3.3",
"@qvac/diffusion-cpp": "0.1.1",
"@qvac/dl-filesystem": "^0.2.0",
"@qvac/embed-llamacpp": "^0.13.1",
"@qvac/embed-llamacpp": "^0.13.4",
"@qvac/error": "^0.1.1",
"@qvac/langdetect-text": "^0.1.0",
"@qvac/llm-llamacpp": "^0.14.0",
"@qvac/llm-llamacpp": "^0.14.4",
"@qvac/logging": "^0.1.0",
"@qvac/ocr-onnx": "^0.4.0",
"@qvac/rag": "^0.4.4",
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/schemas/completion-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export const completionStatsSchema = z.object({
timeToFirstToken: z.number().optional(),
tokensPerSecond: z.number().optional(),
cacheTokens: z.number().optional(),
backendDevice: z.enum(["cpu", "gpu"]).optional(),
});

export const completionStreamResponseSchema = z.object({
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/schemas/embed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const embedStatsSchema = z.object({
totalTime: z.number().optional(),
tokensPerSecond: z.number().optional(),
totalTokens: z.number().optional(),
backendDevice: z.enum(["cpu", "gpu"]).optional(),
});

export const embedResponseSchema = z.object({
Expand Down
12 changes: 12 additions & 0 deletions packages/sdk/schemas/llamacpp-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ export const llmConfigBaseSchema = z.object({
stop_sequences: z.array(z.string()).optional(),
n_discarded: z.number().optional(),
tools: z.boolean().optional(),
"cache-type-k": z.string().optional(),
"cache-type-v": z.string().optional(),
/**
* Writable directory for OpenCL kernel binary cache. Required on Android
* for fast GPU startup.
*/
openclCacheDir: z.string().optional(),
projectionModelSrc: modelSrcInputSchema.optional(),
});

Expand Down Expand Up @@ -75,6 +82,11 @@ export const embedConfigBaseSchema = z.object({
.union([z.number().int().min(0), z.enum(["integrated", "dedicated"])])
.optional(),
verbosity: verbositySchema.optional(),
/**
* Writable directory for OpenCL kernel binary cache. Required on Android
* for fast GPU startup.
*/
openclCacheDir: z.string().optional(),
});

export type EmbedConfigInput = z.infer<typeof embedConfigBaseSchema>;
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/server/bare/ops/embed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export async function embed(params: EmbedParams): Promise<EmbedResult> {
...(response.stats?.total_time_ms !== undefined && { totalTime: response.stats.total_time_ms }),
...(response.stats?.tokens_per_second !== undefined && { tokensPerSecond: response.stats.tokens_per_second }),
...(response.stats?.total_tokens !== undefined && { totalTokens: response.stats.total_tokens }),
...(response.stats?.backendDevice !== undefined && { backendDevice: response.stats.backendDevice }),
};

const embeddingsArray = rawEmbeddings[0];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ async function* processModelResponse(
...(responseWithStats.stats?.CacheTokens !== undefined && {
cacheTokens: responseWithStats.stats.CacheTokens,
}),
...(responseWithStats.stats?.backendDevice !== undefined && {
backendDevice: responseWithStats.stats.backendDevice,
}),
};

return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ function transformLlmConfig(llmConfig: LlmConfig) {
delete transformed["stop_sequences"];
}

if ("opencl_cache_dir" in transformed) {
transformed["openclCacheDir"] = transformed["opencl_cache_dir"];
delete transformed["opencl_cache_dir"];
}

return transformed;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ function transformEmbedConfig(embedConfig: EmbedConfig): GGMLConfig {
config.verbosity = `${embedConfig.verbosity}`;
}

if (embedConfig.openclCacheDir) {
config.openclCacheDir = embedConfig.openclCacheDir;
}

return config;
}

Expand Down
2 changes: 2 additions & 0 deletions packages/sdk/server/bare/types/addon-responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ export interface LlmStats {
TPS?: number;
CacheTokens?: number;
generatedTokens?: number;
backendDevice?: "cpu" | "gpu";
}

export interface LlmResponse {
Expand Down Expand Up @@ -38,6 +39,7 @@ export interface EmbedStats {
total_time_ms?: number;
tokens_per_second?: number;
total_tokens?: number;
backendDevice?: "cpu" | "gpu";
}

export interface EmbedResponse {
Expand Down
Loading
Loading