diff --git a/docs/website/content/docs/(latest)/sdk/api/close.mdx b/docs/website/content/docs/(latest)/sdk/api/close.mdx
index a6f0912f15..3cc1240816 100644
--- a/docs/website/content/docs/(latest)/sdk/api/close.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/close.mdx
@@ -10,13 +10,9 @@ function close(): Promise<void>;
 
 Safe to call multiple times — subsequent calls are a no-op if already closed.
 
-## Bare direct runtime
-
-When the SDK runs in-process on Bare (not under Node.js with a separate worker process), calling `close()` runs the same teardown as a signal-driven shutdown and then **ends the process with exit code 0**. The returned promise may not settle in that case because the process exits immediately afterward. This matches the behavior users expect after the last model is unloaded (the SDK calls `close()` automatically when nothing remains loaded).
-
 ## Returns
 
-`Promise<void>` — Resolves when the connection is closed (Node.js and Expo). On Bare direct mode, the process usually exits before the promise resolves.
+`Promise<void>` — Resolves when the connection is closed.
 
 ## Example
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/completion.mdx b/docs/website/content/docs/(latest)/sdk/api/completion.mdx
index 6236d79ae2..103db9cdc0 100644
--- a/docs/website/content/docs/(latest)/sdk/api/completion.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/completion.mdx
@@ -140,6 +140,7 @@ Optional sampling and generation parameters (strict — no extra keys allowed):
 | timeToFirstToken | `number` | Time to first token in milliseconds |
 | tokensPerSecond | `number` | Tokens generated per second |
 | cacheTokens | `number` | Number of cached tokens |
+| backendDevice | `"cpu" \| "gpu" \| undefined` | Compute backend used for inference |
 
 ### `ToolCallEvent`
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/defineDuplexHandler.mdx b/docs/website/content/docs/(latest)/sdk/api/defineDuplexHandler.mdx
new file mode 100644
index 0000000000..4e3522eada
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/defineDuplexHandler.mdx
@@ -0,0 +1,31 @@
+---
+title: "defineDuplexHandler( )"
+titleStyle: code
+description: Helper function to define a duplex (bidirectional streaming) handler with full type inference.
+---
+
+```ts
+function defineDuplexHandler<TRequest extends ZodType, TResponse extends ZodType>(
+  definition: DuplexPluginHandlerDefinition<TRequest, TResponse>
+): PluginHandlerDefinition<TRequest, TResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| definition | [`DuplexPluginHandlerDefinition`](#duplexpluginhandlerdefinition) | ✓ | The duplex handler definition with schemas and handler function |
+
+### `DuplexPluginHandlerDefinition`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| requestSchema | `ZodType` | ✓ | Zod schema for validating incoming requests |
+| responseSchema | `ZodType` | ✓ | Zod schema for validating outgoing responses |
+| streaming | `true` | ✓ | Must be `true` — duplex handlers are always streaming |
+| duplex | `true` | ✓ | Must be `true` — marks this handler as bidirectional |
+| handler | `(request, inputStream: AsyncIterable<Buffer>) => AsyncGenerator<response>` | ✓ | The handler function — receives a validated request and an input stream, yields validated response chunks |
+
+## Returns
+
+`PluginHandlerDefinition<TRequest, TResponse>` — The same definition object, with full type inference applied. This is an identity function used for type checking.
diff --git a/docs/website/content/docs/(latest)/sdk/api/diffusion.mdx b/docs/website/content/docs/(latest)/sdk/api/diffusion.mdx
new file mode 100644
index 0000000000..256212e23a
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/diffusion.mdx
@@ -0,0 +1,107 @@
+---
+title: "diffusion( )"
+titleStyle: code
+description: Generates images using a loaded diffusion model.
+---
+
+```ts
+function diffusion(params: DiffusionClientParams): {
+  progressStream: AsyncGenerator<DiffusionProgressTick>;
+  outputs: Promise<Uint8Array[]>;
+  stats: Promise<DiffusionStats | undefined>;
+};
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`DiffusionClientParams`](#diffusionclientparams) | ✓ | The diffusion parameters |
+
+### `DiffusionClientParams`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the loaded diffusion model |
+| prompt | `string` | ✓ | Text prompt describing the image to generate |
+| negative_prompt | `string` | ✗ | Text describing what to avoid in the generated image |
+| width | `number` | ✗ | Image width in pixels (must be a multiple of 8) |
+| height | `number` | ✗ | Image height in pixels (must be a multiple of 8) |
+| steps | `number` | ✗ | Number of diffusion steps |
+| cfg_scale | `number` | ✗ | Classifier-free guidance scale for SD 1.x / 2.x / XL / SD3 models (typical range 1–20, default 7) |
+| guidance | `number` | ✗ | Distilled guidance for FLUX models (typical range 1–10, default 3.5) |
+| sampling_method | [`SamplingMethod`](#samplingmethod) | ✗ | Sampling algorithm |
+| scheduler | [`Scheduler`](#scheduler) | ✗ | Noise scheduler |
+| seed | `number` | ✗ | Random seed for reproducibility |
+| batch_count | `number` | ✗ | Number of images to generate |
+| vae_tiling | `boolean` | ✗ | Enable VAE tiling for large images on limited VRAM |
+| cache_preset | `string` | ✗ | Cache preset identifier |
+
+#### `SamplingMethod`
+
+`"euler" | "euler_a" | "heun" | "dpm2" | "dpm++2m" | "dpm++2mv2" | "dpm++2s_a" | "lcm" | "ipndm" | "ipndm_v" | "ddim_trailing" | "tcd" | "res_multistep" | "res_2s"`
+
+#### `Scheduler`
+
+`"discrete" | "karras" | "exponential" | "ays" | "gits" | "sgm_uniform" | "simple" | "lcm" | "smoothstep" | "kl_optimal" | "bong_tangent"`
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| progressStream | `AsyncGenerator<`[`DiffusionProgressTick`](#diffusionprogresstick)`>` | Stream of generation progress ticks |
+| outputs | `Promise<Uint8Array[]>` | Generated image buffers (resolves when generation completes) |
+| stats | `Promise<`[`DiffusionStats`](#diffusionstats) `\| undefined>` | Performance statistics |
+
+### `DiffusionProgressTick`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| step | `number` | Current diffusion step |
+| totalSteps | `number` | Total number of steps |
+| elapsedMs | `number` | Elapsed time in milliseconds |
+
+### `DiffusionStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| modelLoadMs | `number \| undefined` | Model loading time in milliseconds |
+| generationMs | `number \| undefined` | Single generation time in milliseconds |
+| totalGenerationMs | `number \| undefined` | Total generation time in milliseconds |
+| totalWallMs | `number \| undefined` | Total wall-clock time in milliseconds |
+| totalSteps | `number \| undefined` | Total diffusion steps performed |
+| totalGenerations | `number \| undefined` | Number of generations completed |
+| totalImages | `number \| undefined` | Number of images produced |
+| totalPixels | `number \| undefined` | Total pixels generated |
+| width | `number \| undefined` | Output image width |
+| height | `number \| undefined` | Output image height |
+| seed | `number \| undefined` | Seed used for generation |
+
+## Example
+
+```typescript
+import fs from "fs";
+
+// Basic usage
+const { outputs, stats } = diffusion({ modelId, prompt: "a cat" });
+const buffers = await outputs;
+fs.writeFileSync("output.png", buffers[0]);
+
+// With progress tracking
+const { progressStream, outputs: images } = diffusion({
+  modelId,
+  prompt: "a cat sitting on a windowsill",
+  width: 512,
+  height: 512,
+  steps: 20,
+  cfg_scale: 7,
+});
+
+for await (const { step, totalSteps } of progressStream) {
+  console.log(`${step}/${totalSteps}`);
+}
+
+const imageBuffers = await images;
+```
diff --git a/docs/website/content/docs/(latest)/sdk/api/embed.mdx b/docs/website/content/docs/(latest)/sdk/api/embed.mdx
index 1675acb06c..7eb4d29f2c 100644
--- a/docs/website/content/docs/(latest)/sdk/api/embed.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/embed.mdx
@@ -5,8 +5,8 @@ description: Generates embeddings for a single text using a specified model.
 ---
 
 ```ts
-function embed(params: { modelId: string; text: string }, options?: RPCOptions): Promise<number[]>;
-function embed(params: { modelId: string; text: string[] }, options?: RPCOptions): Promise<number[][]>;
+function embed(params: { modelId: string; text: string }, options?: RPCOptions): Promise<{ embedding: number[]; stats?: EmbedStats }>;
+function embed(params: { modelId: string; text: string[] }, options?: RPCOptions): Promise<{ embedding: number[][]; stats?: EmbedStats }>;
 ```
 
 ## Parameters
@@ -14,7 +14,7 @@ function embed(params: { modelId: string; text: string[] }, options?: RPCOptions
 | Name | Type | Required? | Description |
 | --- | --- | :---: | --- |
 | params | [`EmbedParams`](#embedparams) | ✓ | The embedding parameters |
-| options | [`RPCOptions`](../shared-types/#rpcoptions) | ✗ | Optional RPC transport options |
+| options | [`RPCOptions`](./shared-types#rpcoptions) | ✗ | Optional RPC transport options |
 
 ### `EmbedParams`
 
@@ -25,8 +25,21 @@ function embed(params: { modelId: string; text: string[] }, options?: RPCOptions
 
 ## Returns
 
-- `Promise<number[]>` — When `text` is a single string, returns the embedding vector.
-- `Promise<number[][]>` — When `text` is an array, returns an array of embedding vectors.
+`Promise<object>` — Resolves to an object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| embedding | `number[] \| number[][]` | The embedding vector(s). Single `number[]` when `text` is a string; `number[][]` when `text` is an array. |
+| stats | [`EmbedStats`](#embedstats) ` \| undefined` | Performance statistics |
+
+### `EmbedStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| totalTime | `number \| undefined` | Total embedding time in milliseconds |
+| tokensPerSecond | `number \| undefined` | Tokens processed per second |
+| totalTokens | `number \| undefined` | Total tokens processed |
+| backendDevice | `"cpu" \| "gpu" \| undefined` | Compute backend used for inference |
 
 ## Throws
 
@@ -38,13 +51,17 @@ function embed(params: { modelId: string; text: string[] }, options?: RPCOptions
 
 ```typescript
 // Single text
-const vector = await embed({ modelId: "embedding-model", text: "Hello world" });
-console.log(vector.length); // e.g. 384
+const { embedding, stats } = await embed({
+  modelId: "embedding-model",
+  text: "Hello world",
+});
+console.log(embedding.length); // e.g. 384
+console.log(stats?.tokensPerSecond);
 
 // Multiple texts (batch)
-const vectors = await embed({
+const { embedding: vectors } = await embed({
   modelId: "embedding-model",
-  text: ["Hello world", "How are you?"]
+  text: ["Hello world", "How are you?"],
 });
 console.log(vectors.length); // 2
 ```
diff --git a/docs/website/content/docs/(latest)/sdk/api/errors.mdx b/docs/website/content/docs/(latest)/sdk/api/errors.mdx
index 11efffe2ca..273f237ff3 100644
--- a/docs/website/content/docs/(latest)/sdk/api/errors.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/errors.mdx
@@ -23,9 +23,9 @@ Thrown on the client side (response validation, RPC, provider). Access via `SDK_
 
 | Error | Code | Summary | Thrown by |
 | --- | --- | --- | --- |
-| `INVALID_RESPONSE_TYPE` | 50001 | Invalid response type received. | `cancel()`, `downloadAsset()`, `embed()`, `getModelInfo()`, `loadModel()`, `loggingStream()`, `ping()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()`, `startQVACProvider()`, `stopQVACProvider()`, `unloadModel()` |
+| `INVALID_RESPONSE_TYPE` | 50001 | Invalid response type received. | `cancel()`, `downloadAsset()`, `embed()`, `finetune()`, `getModelInfo()`, `heartbeat()`, `loadModel()`, `loggingStream()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()`, `resume()`, `startQVACProvider()`, `stopQVACProvider()`, `suspend()`, `unloadModel()` |
 | `INVALID_OPERATION_IN_RESPONSE` | 50002 | Response operation didn't match the expected RAG operation. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
-| `STREAM_ENDED_WITHOUT_RESPONSE` | 50003 | Streaming RPC ended without a final response. | `downloadAsset()`, `loadModel()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | 50003 | Streaming RPC ended without a final response. | `downloadAsset()`, `finetune()`, `loadModel()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
 | `INVALID_AUDIO_CHUNK_TYPE` | 50004 | Invalid audio chunk input type provided. | `transcribe()`, `transcribeStream()` |
 | `INVALID_TOOLS_ARRAY` | 50005 | Invalid tools array provided. | `completion()` |
 | `INVALID_TOOL_SCHEMA` | 50006 | Invalid tool schema provided. | `completion()` |
@@ -126,6 +126,8 @@ Thrown by the server (model operations, downloads, cache, RAG). Access via `SDK_
 | `DELEGATE_PROVIDER_ERROR` | 53702 | Delegated provider returned an error. | `completion()`, `loadModel()` |
 | `RPC_NO_DATA_RECEIVED` | 53703 | No data received from request. | Internal server RPC |
 | `RPC_UNKNOWN_REQUEST_TYPE` | 53704 | Unknown request type received. | Internal server RPC |
+| `LIFECYCLE_SUSPEND_FAILED` | 53600 | Failed to suspend one or more resources. | `suspend()` |
+| `LIFECYCLE_RESUME_FAILED` | 53601 | Failed to resume one or more resources. | `resume()` |
 | `PLUGIN_NOT_FOUND` | 53850 | Plugin not found for the specified model type. | `invokePlugin()`, `invokePluginStream()`, `loadModel()` |
 | `PLUGIN_HANDLER_NOT_FOUND` | 53851 | Handler not found in plugin. | `invokePlugin()`, `invokePluginStream()` |
 | `PLUGIN_REQUEST_VALIDATION_FAILED` | 53852 | Plugin request validation failed. | `invokePlugin()`, `invokePluginStream()` |
diff --git a/docs/website/content/docs/(latest)/sdk/api/finetune.mdx b/docs/website/content/docs/(latest)/sdk/api/finetune.mdx
new file mode 100644
index 0000000000..43c8e21271
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/finetune.mdx
@@ -0,0 +1,189 @@
+---
+title: "finetune( )"
+titleStyle: code
+description: Starts, resumes, inspects, pauses, or cancels a finetuning job for a loaded model.
+---
+
+```ts
+function finetune(params: FinetuneRunParams, rpcOptions?: RPCOptions): FinetuneHandle;
+function finetune(params: FinetuneStopParams | FinetuneGetStateParams, rpcOptions?: RPCOptions): Promise<FinetuneResult>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`FinetuneRunParams`](#finetunerunparams) \| [`FinetuneStopParams`](#finetunestopparams) \| [`FinetuneGetStateParams`](#finetunegetstateparams) | ✓ | The finetuning parameters — shape determines the overload |
+| rpcOptions | [`RPCOptions`](./shared-types#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `FinetuneRunParams`
+
+Used to start or resume a finetuning job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the loaded model to finetune |
+| operation | `"start" \| "resume"` | ✗ | Omit to let the add-on choose whether to start fresh or resume automatically |
+| options | [`FinetuneOptions`](#finetuneoptions) | ✓ | Finetuning configuration |
+
+### `FinetuneStopParams`
+
+Used to pause or cancel a running job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the model |
+| operation | `"pause" \| "cancel"` | ✓ | The stop operation |
+
+### `FinetuneGetStateParams`
+
+Used to inspect the current state of a finetuning job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the model |
+| operation | `"getState"` | ✓ | Must be `"getState"` |
+| options | [`FinetuneOptions`](#finetuneoptions) | ✓ | Finetuning configuration |
+
+### `FinetuneOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| trainDatasetDir | `string` | ✓ | Directory containing the training dataset |
+| validation | [`FinetuneValidation`](#finetunevalidation) | ✓ | Validation configuration |
+| outputParametersDir | `string` | ✓ | Directory where output adapter parameters are written |
+| numberOfEpochs | `number` | ✗ | Number of epochs to run |
+| learningRate | `number` | ✗ | Learning rate override |
+| contextLength | `number` | ✗ | Context length override |
+| batchSize | `number` | ✗ | Batch size override |
+| microBatchSize | `number` | ✗ | Micro batch size override |
+| assistantLossOnly | `boolean` | ✗ | Compute loss only on assistant tokens |
+| loraRank | `number` | ✗ | LoRA rank override |
+| loraAlpha | `number` | ✗ | LoRA alpha override |
+| loraInitStd | `number` | ✗ | LoRA initialization standard deviation |
+| loraSeed | `number` | ✗ | LoRA initialization seed |
+| loraModules | `string` | ✗ | Comma-separated LoRA module selection |
+| checkpointSaveDir | `string` | ✗ | Directory for checkpoint snapshots |
+| checkpointSaveSteps | `number` | ✗ | Checkpoint save interval (in steps) |
+| chatTemplatePath | `string` | ✗ | Custom chat template path |
+| lrScheduler | `"constant" \| "cosine" \| "linear"` | ✗ | Learning rate scheduler |
+| lrMin | `number` | ✗ | Minimum learning rate |
+| warmupRatio | `number` | ✗ | Warmup ratio (0–1) |
+| warmupRatioSet | `boolean` | ✗ | Enable warmup ratio |
+| warmupSteps | `number` | ✗ | Warmup step count |
+| warmupStepsSet | `boolean` | ✗ | Enable explicit warmup steps |
+| weightDecay | `number` | ✗ | Weight decay override |
+
+#### `FinetuneValidation`
+
+Discriminated union on `type`:
+
+| Variant | Fields | Description |
+| --- | --- | --- |
+| `{ type: "none" }` | — | No validation |
+| `{ type: "split", fraction?: number }` | fraction defaults to `0.05` | Split training data for validation |
+| `{ type: "dataset", path: string }` | — | Use a separate validation dataset |
+
+## Returns
+
+The return type depends on the `operation`:
+
+**Run overload** (operation omitted, `"start"`, or `"resume"`):
+
+`FinetuneHandle` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| progressStream | `AsyncGenerator<`[`FinetuneProgress`](#finetuneprogress)`>` | Stream of training progress ticks |
+| result | `Promise<`[`FinetuneResult`](#finetuneresult)`>` | Resolves when the job finishes |
+
+**Reply overload** (`"pause"`, `"cancel"`, or `"getState"`):
+
+`Promise<FinetuneResult>`
+
+### `FinetuneProgress`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| is_train | `boolean` | Whether this tick is from the training phase (vs validation) |
+| loss | `number \| null` | Current loss value |
+| loss_uncertainty | `number \| null` | Loss uncertainty |
+| accuracy | `number \| null` | Current accuracy |
+| accuracy_uncertainty | `number \| null` | Accuracy uncertainty |
+| global_steps | `number` | Total steps completed |
+| current_epoch | `number` | Current epoch index |
+| current_batch | `number` | Current batch index |
+| total_batches | `number` | Total batches in the epoch |
+| elapsed_ms | `number` | Elapsed time in milliseconds |
+| eta_ms | `number` | Estimated time remaining in milliseconds |
+
+### `FinetuneResult`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"finetune"` | Response type discriminator |
+| status | [`FinetuneStatus`](#finetunestatus) | Current job status |
+| stats | [`FinetuneStats`](#finetunestats) ` \| undefined` | Final training statistics (present when completed) |
+
+### `FinetuneStatus`
+
+`"IDLE" | "RUNNING" | "PAUSED" | "CANCELLED" | "COMPLETED"`
+
+### `FinetuneStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| train_loss | `number \| undefined` | Final training loss |
+| train_loss_uncertainty | `number \| null \| undefined` | Training loss uncertainty |
+| val_loss | `number \| undefined` | Final validation loss |
+| val_loss_uncertainty | `number \| null \| undefined` | Validation loss uncertainty |
+| train_accuracy | `number \| undefined` | Final training accuracy |
+| train_accuracy_uncertainty | `number \| null \| undefined` | Training accuracy uncertainty |
+| val_accuracy | `number \| undefined` | Final validation accuracy |
+| val_accuracy_uncertainty | `number \| null \| undefined` | Validation accuracy uncertainty |
+| learning_rate | `number \| undefined` | Final learning rate |
+| global_steps | `number` | Total steps completed |
+| epochs_completed | `number` | Total epochs completed |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"finetune"` |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Stream ended without receiving the terminal finetune response |
+
+## Example
+
+```typescript
+const handle = finetune({
+  modelId,
+  options: {
+    trainDatasetDir: "./dataset/train",
+    validation: { type: "split", fraction: 0.05 },
+    outputParametersDir: "./artifacts/lora",
+    numberOfEpochs: 2,
+  },
+});
+
+for await (const progress of handle.progressStream) {
+  console.log(progress.global_steps, progress.loss);
+}
+
+console.log(await handle.result);
+
+// Pause a running job
+const pauseResult = await finetune({ modelId, operation: "pause" });
+console.log(pauseResult.status); // "PAUSED"
+
+// Inspect current state
+const state = await finetune({
+  modelId,
+  operation: "getState",
+  options: {
+    trainDatasetDir: "./dataset/train",
+    validation: { type: "none" },
+    outputParametersDir: "./artifacts/lora",
+  },
+});
+console.log(state.status);
+```
diff --git a/docs/website/content/docs/(latest)/sdk/api/getModelByPath.mdx b/docs/website/content/docs/(latest)/sdk/api/getModelByPath.mdx
index c78886dc94..96becb99f6 100644
--- a/docs/website/content/docs/(latest)/sdk/api/getModelByPath.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/getModelByPath.mdx
@@ -16,7 +16,7 @@ function getModelByPath(registryPath: string): RegistryItem | undefined;
 
 ## Returns
 
-[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found. See [`getModelByName()`](../getModelByName#registryitem) for the full `RegistryItem` shape.
+[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found.
 
 ## Example
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/getModelBySrc.mdx b/docs/website/content/docs/(latest)/sdk/api/getModelBySrc.mdx
index 35b5034135..a109cf46c0 100644
--- a/docs/website/content/docs/(latest)/sdk/api/getModelBySrc.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/getModelBySrc.mdx
@@ -17,7 +17,7 @@ function getModelBySrc(modelId: string, blobCoreKey: string): RegistryItem | und
 
 ## Returns
 
-[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found. See [`getModelByName()`](../getModelByName#registryitem) for the full `RegistryItem` shape.
+[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found. 
 
 ## Example
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/getModelInfo.mdx b/docs/website/content/docs/(latest)/sdk/api/getModelInfo.mdx
index 2765bf9ea7..3a2b27eaa6 100644
--- a/docs/website/content/docs/(latest)/sdk/api/getModelInfo.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/getModelInfo.mdx
@@ -5,7 +5,7 @@ description: Retrieves detailed information about a model, including cache statu
 ---
 
 ```ts
-function getModelInfo(params): Promise<ModelInfo>;
+function getModelInfo(params: GetModelInfoParams): Promise<ModelInfo>;
 ```
 
 ## Parameters
diff --git a/docs/website/content/docs/(latest)/sdk/api/heartbeat.mdx b/docs/website/content/docs/(latest)/sdk/api/heartbeat.mdx
new file mode 100644
index 0000000000..94c4dca47d
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/heartbeat.mdx
@@ -0,0 +1,61 @@
+---
+title: "heartbeat( )"
+titleStyle: code
+description: Checks if a delegated provider or the local SDK worker is responsive.
+---
+
+```ts
+function heartbeat(params?: { delegate?: DelegateBase }): Promise<HeartbeatResponse>;
+```
+
+Sends a heartbeat round-trip to verify that a delegated provider is online or that the local SDK worker is responsive. When called without arguments, checks the local worker.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | `object` | ✗ | Optional delegation target |
+| params.delegate | [`DelegateBase`](#delegatebase) | ✗ | The provider to check — omit to check the local worker |
+
+### `DelegateBase`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| topic | `string` | ✓ | Hyperswarm topic hex string |
+| providerPublicKey | `string` | ✓ | Provider peer public key hex string |
+| timeout | `number` | ✗ | Connection timeout in milliseconds (min 100) |
+| healthCheckTimeout | `number` | ✗ | Health check timeout in milliseconds (min 100) |
+
+## Returns
+
+`Promise<HeartbeatResponse>`
+
+### `HeartbeatResponse`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"heartbeat"` | Response type discriminator |
+| number | `number` | Round-trip sequence number |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"heartbeat"` |
+
+## Examples
+
+```typescript
+// Check if a delegated provider is online
+try {
+  await heartbeat({
+    delegate: { topic: "topicHex", providerPublicKey: "peerHex", timeout: 3000 },
+  });
+  console.log("Provider is online");
+} catch {
+  console.log("Provider is offline");
+}
+
+// Check if the local SDK worker is responsive
+await heartbeat();
+```
diff --git a/docs/website/content/docs/(latest)/sdk/api/index.mdx b/docs/website/content/docs/(latest)/sdk/api/index.mdx
index 125f6992da..1a6e89d900 100644
--- a/docs/website/content/docs/(latest)/sdk/api/index.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/index.mdx
@@ -1,8 +1,7 @@
 ---
 title: "@qvac/sdk"
 titleStyle: code
-description: API reference — v0.8.0
-ogImage: /og-sdk-js-api-reference.png
+description: API reference — v0.9.0
 ---
 
 ## Overview
@@ -16,16 +15,20 @@ ogImage: /og-sdk-js-api-reference.png
 | [`cancel()`](./cancel) | Cancels an ongoing operation. |
 | [`close()`](./close) | Closes the SDK client connection and releases all associated resources. |
 | [`completion()`](./completion) | Generates completion from a language model based on conversation history. |
+| [`defineDuplexHandler()`](./defineDuplexHandler) | Helper function to define a duplex (bidirectional streaming) handler with full type inference. |
 | [`defineHandler()`](./defineHandler) | Helper function to define a handler with full type inference. |
 | [`definePlugin()`](./definePlugin) | Helper function to define a plugin with full type inference. |
 | [`deleteCache()`](./deleteCache) | Deletes KV cache files. |
+| [`diffusion()`](./diffusion) | Generates images using a loaded diffusion model. |
 | [`downloadAsset()`](./downloadAsset) | Downloads an asset (model file) without loading it into memory. |
 | [`embed()`](./embed) | Generates embeddings for a single text using a specified model. |
+| [`finetune()`](./finetune) | Starts, resumes, inspects, pauses, or cancels a finetuning job. |
 | [`getLogger()`](./getLogger) | Creates or retrieves a namespaced logger instance. |
 | [`getModelByName()`](./getModelByName) | Looks up a model in the built-in catalog by its constant name. |
 | [`getModelByPath()`](./getModelByPath) | Looks up a model in the built-in catalog by its registry path. |
 | [`getModelBySrc()`](./getModelBySrc) | Looks up a model in the built-in catalog by model file ID and blob core key. |
 | [`getModelInfo()`](./getModelInfo) | Returns status information for a catalog model, including cache state and loaded instances. |
+| [`heartbeat()`](./heartbeat) | Checks if a delegated provider or the local SDK worker is responsive. |
 | [`invokePlugin()`](./invokePlugin) | Invoke a non-streaming plugin handler. |
 | [`invokePluginStream()`](./invokePluginStream) | Invoke a streaming plugin handler. |
 | [`loadModel()`](./loadModel) | Loads a machine learning model from a local path, remote URL, or Hyperdrive key. |
@@ -34,7 +37,6 @@ ogImage: /og-sdk-js-api-reference.png
 | [`modelRegistryList()`](./modelRegistryList) | Returns all available models from the QVAC distributed model registry. |
 | [`modelRegistrySearch()`](./modelRegistrySearch) | Searches the model registry with optional filters for model type, engine, and quantization. |
 | [`ocr()`](./ocr) | Performs Optical Character Recognition (OCR) on an image to extract text. |
-| [`ping()`](./ping) | Sends a ping request to the server and returns the pong response. |
 | [`ragChunk()`](./ragChunk) | Chunks documents into smaller pieces for embedding. |
 | [`ragCloseWorkspace()`](./ragCloseWorkspace) | Closes a RAG workspace, releasing in-memory resources (Corestore, HyperDB adapter, RAG instance). |
 | [`ragDeleteEmbeddings()`](./ragDeleteEmbeddings) | Deletes document embeddings from the RAG vector database. |
@@ -44,8 +46,10 @@ ogImage: /og-sdk-js-api-reference.png
 | [`ragReindex()`](./ragReindex) | Reindexes the RAG database to optimize search performance. |
 | [`ragSaveEmbeddings()`](./ragSaveEmbeddings) | Saves pre-embedded documents to the RAG vector database. |
 | [`ragSearch()`](./ragSearch) | Searches for similar documents in the RAG vector database. |
+| [`resume()`](./resume) | Resumes all suspended Hyperswarm and Corestore resources. |
 | [`startQVACProvider()`](./startQVACProvider) | Starts a provider service that offers QVAC capabilities to remote peers. |
 | [`stopQVACProvider()`](./stopQVACProvider) | Stops a running provider service and leaves the specified topic. |
+| [`suspend()`](./suspend) | Suspends all active Hyperswarm and Corestore resources. |
 | [`textToSpeech()`](./textToSpeech) | Converts text to speech audio using a loaded TTS model. |
 | [`transcribe()`](./transcribe) | Collects all streaming results into a single string response. |
 | [`transcribeStream()`](./transcribeStream) | Streams audio transcription results in real-time, yielding text chunks as they become available. |
diff --git a/docs/website/content/docs/(latest)/sdk/api/loadModel.mdx b/docs/website/content/docs/(latest)/sdk/api/loadModel.mdx
index 0d03fce6c5..246dc0bbc9 100644
--- a/docs/website/content/docs/(latest)/sdk/api/loadModel.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/loadModel.mdx
@@ -232,6 +232,7 @@ Discriminated union on `engine`. Common generation parameters (all optional):
 
 Engine-specific:
 
+- **Opus**: _Deprecated in v1.0.0. Use Bergamot for European language pairs._
 - **Bergamot**: `from`/`to` accept 24 languages (en, ar, bg, ca, cs, de, es, et, fi, fr, hu, is, it, ja, ko, lt, lv, nl, pl, pt, ru, sk, sl, uk, zh). Additional fields: `srcVocabSrc`, `dstVocabSrc`, `normalize`, `pivotModel`
 - **IndicTrans**: `from`/`to` accept 26 Indic language codes (e.g., `"eng_Latn"`, `"hin_Deva"`)
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/loggingStream.mdx b/docs/website/content/docs/(latest)/sdk/api/loggingStream.mdx
index dce9af4b4d..ac516288cf 100644
--- a/docs/website/content/docs/(latest)/sdk/api/loggingStream.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/loggingStream.mdx
@@ -5,20 +5,15 @@ description: Opens a logging stream to receive real-time logs.
 ---
 
 ```ts
-function loggingStream(params: { id: string }): AsyncGenerator<LoggingStreamResponse>;
+function loggingStream(params: LoggingParams): AsyncGenerator<LoggingStreamResponse>;
 ```
 
 ## Parameters
 
 | Name | Type | Required? | Description |
 | --- | --- | :---: | --- |
-| params | [`LoggingStreamParams`](#loggingstreamparams) | ✓ | The logging stream parameters |
-
-### `LoggingStreamParams`
-
-| Field | Type | Required? | Description |
-| --- | --- | :---: | --- |
-| id | `string` | ✓ | The identifier to stream logs for. Pass a model ID for model logs, or the exported constant `SDK_LOG_ID` for SDK server logs. |
+| params | `object` | ✓ | The logging stream parameters |
+| params.id | `string` | ✓ | The identifier to stream logs for. Pass a model ID for model logs, or the exported constant `SDK_LOG_ID` for SDK server logs. |
 
 ## Returns
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/modelRegistrySearch.mdx b/docs/website/content/docs/(latest)/sdk/api/modelRegistrySearch.mdx
index 41b7099545..bedf95f26f 100644
--- a/docs/website/content/docs/(latest)/sdk/api/modelRegistrySearch.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/modelRegistrySearch.mdx
@@ -5,7 +5,7 @@ description: Searches the QVAC model registry with optional filters.
 ---
 
 ```ts
-function modelRegistrySearch(params?): Promise<ModelRegistryEntry[]>;
+function modelRegistrySearch(params?: ModelRegistrySearchParams): Promise<ModelRegistryEntry[]>;
 ```
 
 ## Parameters
@@ -26,7 +26,7 @@ function modelRegistrySearch(params?): Promise<ModelRegistryEntry[]>;
 
 ## Returns
 
-`Promise<`[`ModelRegistryEntry[]`](../modelRegistryGetModel#modelregistryentry)`>` — Matching model entries. See [`modelRegistryGetModel()`](../modelRegistryGetModel#modelregistryentry) for the `ModelRegistryEntry` shape.
+`Promise<`[`ModelRegistryEntry[]`](../modelRegistryGetModel#modelregistryentry)`>` — Matching model entries. 
 
 ## Throws
 
diff --git a/docs/website/content/docs/(latest)/sdk/api/ping.mdx b/docs/website/content/docs/(latest)/sdk/api/ping.mdx
deleted file mode 100644
index e4722632cd..0000000000
--- a/docs/website/content/docs/(latest)/sdk/api/ping.mdx
+++ /dev/null
@@ -1,19 +0,0 @@
----
-title: "ping( )"
-titleStyle: code
-description: Sends a ping request to the server and returns the pong response.
----
-
-```ts
-function ping(): Promise<{ type: "pong"; number: number }>;
-```
-
-## Returns
-
-`Promise<{ type: "pong", number: number }>` — The server's pong response.
-
-## Throws
-
-| Error | When |
-| --- | --- |
-| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"pong"` |
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragChunk.mdx b/docs/website/content/docs/(latest)/sdk/api/ragChunk.mdx
index 4fba60ad4a..1208272219 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragChunk.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragChunk.mdx
@@ -5,7 +5,7 @@ description: Chunks documents into smaller pieces for embedding.
 ---
 
 ```ts
-function ragChunk(params, options?): Promise<RagDoc[]>;
+function ragChunk(params: RagChunkParams, options?: RPCOptions): Promise<RagDoc[]>;
 ```
 
 Part of the segregated flow: `ragChunk()` → [`embed()`](../embed) → [`ragSaveEmbeddings()`](../ragSaveEmbeddings)
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragCloseWorkspace.mdx b/docs/website/content/docs/(latest)/sdk/api/ragCloseWorkspace.mdx
index 2eb9c471ac..ba4225b0f5 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragCloseWorkspace.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragCloseWorkspace.mdx
@@ -5,7 +5,7 @@ description: Closes a RAG workspace, releasing in-memory resources (Corestore, H
 ---
 
 ```ts
-function ragCloseWorkspace(params?, options?): Promise<void>;
+function ragCloseWorkspace(params?: RagCloseWorkspaceParams, options?: RPCOptions): Promise<void>;
 ```
 
 Releases Corestore, HyperDB adapter, and RAG instance. Workspace data remains on disk unless `deleteOnClose` is set.
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragDeleteEmbeddings.mdx b/docs/website/content/docs/(latest)/sdk/api/ragDeleteEmbeddings.mdx
index 66c9c5667b..ffb86d7b2f 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragDeleteEmbeddings.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragDeleteEmbeddings.mdx
@@ -5,7 +5,7 @@ description: Deletes document embeddings from the RAG vector database.
 ---
 
 ```ts
-function ragDeleteEmbeddings(params, options?): Promise<void>;
+function ragDeleteEmbeddings(params: RagDeleteEmbeddingsParams, options?: RPCOptions): Promise<void>;
 ```
 
 ## Parameters
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragDeleteWorkspace.mdx b/docs/website/content/docs/(latest)/sdk/api/ragDeleteWorkspace.mdx
index c413c01408..9f4dd8b7f7 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragDeleteWorkspace.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragDeleteWorkspace.mdx
@@ -5,7 +5,7 @@ description: Deletes a RAG workspace and all its data.
 ---
 
 ```ts
-function ragDeleteWorkspace(params, options?): Promise<void>;
+function ragDeleteWorkspace(params: RagDeleteWorkspaceParams, options?: RPCOptions): Promise<void>;
 ```
 
 The workspace must not be currently loaded/in-use.
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragIngest.mdx b/docs/website/content/docs/(latest)/sdk/api/ragIngest.mdx
index cfa3475741..5172542f2b 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragIngest.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragIngest.mdx
@@ -5,7 +5,7 @@ description: Ingests documents into the RAG vector database.
 ---
 
 ```ts
-function ragIngest(params, options?): Promise<{ processed: RagSaveEmbeddingsResult[]; droppedIndices: number[] }>;
+function ragIngest(params: RagIngestParams, options?: RPCOptions): Promise<{ processed: RagSaveEmbeddingsResult[]; droppedIndices: number[] }>;
 ```
 
 Full pipeline: chunk → embed → save. Implicitly opens (or creates) the workspace.
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragListWorkspaces.mdx b/docs/website/content/docs/(latest)/sdk/api/ragListWorkspaces.mdx
index a7507e9f91..3b01f7b247 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragListWorkspaces.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragListWorkspaces.mdx
@@ -5,7 +5,7 @@ description: Lists all RAG workspaces with their open status.
 ---
 
 ```ts
-function ragListWorkspaces(options?): Promise<RagWorkspaceInfo[]>;
+function ragListWorkspaces(options?: RPCOptions): Promise<RagWorkspaceInfo[]>;
 ```
 
 ## Parameters
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragReindex.mdx b/docs/website/content/docs/(latest)/sdk/api/ragReindex.mdx
index 89d2502288..670eab2ec1 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragReindex.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragReindex.mdx
@@ -5,7 +5,7 @@ description: Reindexes the RAG database to optimize search performance.
 ---
 
 ```ts
-function ragReindex(params, options?): Promise<RagReindexResult>;
+function ragReindex(params: RagReindexParams, options?: RPCOptions): Promise<RagReindexResult>;
 ```
 
 For HyperDB, rebalances centroids using k-means clustering. Requires a minimum number of documents (16 by default).
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragSaveEmbeddings.mdx b/docs/website/content/docs/(latest)/sdk/api/ragSaveEmbeddings.mdx
index 0988ef573d..6368f231b9 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragSaveEmbeddings.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragSaveEmbeddings.mdx
@@ -5,7 +5,7 @@ description: Saves pre-embedded documents to the RAG vector database.
 ---
 
 ```ts
-function ragSaveEmbeddings(params, options?): Promise<RagSaveEmbeddingsResult[]>;
+function ragSaveEmbeddings(params: RagSaveEmbeddingsParams, options?: RPCOptions): Promise<RagSaveEmbeddingsResult[]>;
 ```
 
 Part of the segregated flow: [`ragChunk()`](../ragChunk) → [`embed()`](../embed) → `ragSaveEmbeddings()`. Implicitly opens (or creates) the workspace.
diff --git a/docs/website/content/docs/(latest)/sdk/api/ragSearch.mdx b/docs/website/content/docs/(latest)/sdk/api/ragSearch.mdx
index 01572aea8c..490bdeb348 100644
--- a/docs/website/content/docs/(latest)/sdk/api/ragSearch.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/ragSearch.mdx
@@ -5,7 +5,7 @@ description: Searches for similar documents in the RAG vector database.
 ---
 
 ```ts
-function ragSearch(params, options?): Promise<RagSearchResult[]>;
+function ragSearch(params: RagSearchParams, options?: RPCOptions): Promise<RagSearchResult[]>;
 ```
 
 ## Parameters
diff --git a/docs/website/content/docs/(latest)/sdk/api/resume.mdx b/docs/website/content/docs/(latest)/sdk/api/resume.mdx
new file mode 100644
index 0000000000..7873a76758
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/resume.mdx
@@ -0,0 +1,34 @@
+---
+title: "resume( )"
+titleStyle: code
+description: Resumes all suspended Hyperswarm and Corestore resources.
+---
+
+```ts
+function resume(): Promise<void>;
+```
+
+Resumes all suspended Hyperswarm and Corestore resources. Idempotent — calling while already active is a no-op. Also serves as the recovery path after a partial suspend failure.
+
+Typically used in mobile apps when the application returns to the foreground, paired with [`suspend()`](./suspend) when it moves to the background.
+
+## Parameters
+
+None.
+
+## Returns
+
+`Promise<void>`
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"resume"` |
+| `LIFECYCLE_RESUME_FAILED` | One or more resources failed to resume |
+
+## Example
+
+```typescript
+await resume();
+```
diff --git a/docs/website/content/docs/(latest)/sdk/api/startQVACProvider.mdx b/docs/website/content/docs/(latest)/sdk/api/startQVACProvider.mdx
index 2561c41ddd..249ef06150 100644
--- a/docs/website/content/docs/(latest)/sdk/api/startQVACProvider.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/startQVACProvider.mdx
@@ -5,7 +5,7 @@ description: Starts a provider service that offers QVAC capabilities to remote p
 ---
 
 ```ts
-function startQVACProvider(params): Promise<object>;
+function startQVACProvider(params: ProvideParams): Promise<object>;
 ```
 
 The provider's keypair can be controlled via the seed option or the `QVAC_HYPERSWARM_SEED` environment variable.
diff --git a/docs/website/content/docs/(latest)/sdk/api/stopQVACProvider.mdx b/docs/website/content/docs/(latest)/sdk/api/stopQVACProvider.mdx
index 71c04ef7f8..efc9188ce6 100644
--- a/docs/website/content/docs/(latest)/sdk/api/stopQVACProvider.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/stopQVACProvider.mdx
@@ -5,7 +5,7 @@ description: Stops a running provider service and leaves the specified topic.
 ---
 
 ```ts
-function stopQVACProvider(params): Promise<object>;
+function stopQVACProvider(params: StopProvideParams): Promise<object>;
 ```
 
 ## Parameters
diff --git a/docs/website/content/docs/(latest)/sdk/api/suspend.mdx b/docs/website/content/docs/(latest)/sdk/api/suspend.mdx
new file mode 100644
index 0000000000..43872419cf
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/api/suspend.mdx
@@ -0,0 +1,34 @@
+---
+title: "suspend( )"
+titleStyle: code
+description: Suspends all active Hyperswarm and Corestore resources.
+---
+
+```ts
+function suspend(): Promise<void>;
+```
+
+Suspends all active Hyperswarm and Corestore resources. Idempotent — calling while already suspended is a no-op.
+
+Typically used in mobile apps when the application moves to the background, paired with [`resume()`](./resume) when it returns to the foreground.
+
+## Parameters
+
+None.
+
+## Returns
+
+`Promise<void>`
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"suspend"` |
+| `LIFECYCLE_SUSPEND_FAILED` | One or more resources failed to suspend (partial failure) |
+
+## Example
+
+```typescript
+await suspend();
+```
diff --git a/docs/website/content/docs/(latest)/sdk/api/textToSpeech.mdx b/docs/website/content/docs/(latest)/sdk/api/textToSpeech.mdx
index 94b468cc24..cfb93b7cdb 100644
--- a/docs/website/content/docs/(latest)/sdk/api/textToSpeech.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/textToSpeech.mdx
@@ -5,7 +5,7 @@ description: Converts text to speech audio.
 ---
 
 ```ts
-function textToSpeech(params, options?): {
+function textToSpeech(params: TtsClientParams, options?: RPCOptions): {
   bufferStream: AsyncGenerator<number>;
   buffer: Promise<number[]>;
   done: Promise<boolean>;
diff --git a/docs/website/content/docs/(latest)/sdk/api/transcribe.mdx b/docs/website/content/docs/(latest)/sdk/api/transcribe.mdx
index bd1bf2f1b5..4cc46b12e0 100644
--- a/docs/website/content/docs/(latest)/sdk/api/transcribe.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/transcribe.mdx
@@ -5,7 +5,7 @@ description: Provides a simple interface for transcribing audio by collecting al
 ---
 
 ```ts
-function transcribe(params, options?): Promise<string>;
+function transcribe(params: TranscribeClientParams, options?: RPCOptions): Promise<string>;
 ```
 
 Collects all streaming results from [`transcribeStream()`](../transcribeStream) into a single string.
diff --git a/docs/website/content/docs/(latest)/sdk/api/transcribeStream.mdx b/docs/website/content/docs/(latest)/sdk/api/transcribeStream.mdx
index a6eaf2ffca..1d4178b3ad 100644
--- a/docs/website/content/docs/(latest)/sdk/api/transcribeStream.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/transcribeStream.mdx
@@ -5,7 +5,7 @@ description: Streams audio transcription results in real-time, yielding text chu
 ---
 
 ```ts
-function transcribeStream(params, options?): AsyncGenerator<string>;
+function transcribeStream(params: TranscribeClientParams, options?: RPCOptions): AsyncGenerator<string>;
 ```
 
 Yields text chunks as they become available from the model.
diff --git a/docs/website/content/docs/(latest)/sdk/api/translate.mdx b/docs/website/content/docs/(latest)/sdk/api/translate.mdx
index 9124fd7ff5..97c61ba238 100644
--- a/docs/website/content/docs/(latest)/sdk/api/translate.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/translate.mdx
@@ -5,7 +5,7 @@ description: Translates text from one language to another using a specified tran
 ---
 
 ```ts
-function translate(params): {
+function translate(params: TranslateClientParams): {
   tokenStream: AsyncGenerator<string>;
   text: Promise<string>;
   stats: Promise<TranslationStats | undefined>;
diff --git a/docs/website/content/docs/(latest)/sdk/api/unloadModel.mdx b/docs/website/content/docs/(latest)/sdk/api/unloadModel.mdx
index c798515f17..4c7dd458b4 100644
--- a/docs/website/content/docs/(latest)/sdk/api/unloadModel.mdx
+++ b/docs/website/content/docs/(latest)/sdk/api/unloadModel.mdx
@@ -5,7 +5,7 @@ description: Unloads a previously loaded model from the server.
 ---
 
 ```ts
-function unloadModel(params): Promise<void>;
+function unloadModel(params: UnloadModelParams): Promise<void>;
 ```
 
 When the last model is unloaded and no providers are active, the RPC connection is automatically closed, allowing the process to exit naturally.
diff --git a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/fine-tuning.mdx b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/fine-tuning.mdx
index aefd169766..52f4b72ef1 100644
--- a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/fine-tuning.mdx
+++ b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/fine-tuning.mdx
@@ -18,7 +18,7 @@ The output is a small `.gguf` adapter file that you can pass to `completion()` v
 
 Use the following sequence of function calls:
 1. [`loadModel()`](/sdk/api/loadModel)
-2. `finetune()`
+2. [`finetune()`](/sdk/api/finetune)
 3. [`unloadModel()`](/sdk/api/unloadModel)
 
 For how to use each function, see [SDK — API reference](/sdk/api/).
diff --git a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
index 410ce01ffd..3560596a1c 100644
--- a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
+++ b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
@@ -13,7 +13,7 @@ Image generation uses [`qvac-ext-stable-diffusion.cpp`](https://github.com/tethe
 
 Use the following sequence of function calls:
 1. [`loadModel()`](/sdk/api/loadModel)
-2. `diffusion()`
+2. [`diffusion()`](/sdk/api/diffusion)
 3. [`unloadModel()`](/sdk/api/unloadModel)
 
 For how to use each function, see [SDK — API reference](/sdk/api/).
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/cancel.mdx b/docs/website/content/docs/v0.9.0/sdk/api/cancel.mdx
new file mode 100644
index 0000000000..b12831d5bd
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/cancel.mdx
@@ -0,0 +1,79 @@
+---
+title: "cancel( )"
+titleStyle: code
+description: Cancels an ongoing operation.
+---
+
+```ts
+function cancel(params: CancelParams): Promise<void>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`CancelParams`](#cancelparams) | ✓ | The parameters for the cancellation |
+
+### `CancelParams`
+
+Discriminated union on `operation`. One of:
+
+#### Cancel inference
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| operation | `"inference"` | ✓ | Operation type |
+| modelId | `string` | ✓ | The model ID to cancel inference for |
+
+#### Cancel download
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| operation | `"downloadAsset"` | ✓ | — | Operation type |
+| downloadKey | `string` | ✓ | — | The download key to cancel |
+| clearCache | `boolean` | ✗ | `false` | If true, deletes the partial download file |
+
+#### Cancel RAG
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| operation | `"rag"` | ✓ | — | Operation type |
+| workspace | `string` | ✗ | `"default"` | The RAG workspace to cancel |
+
+## Returns
+
+`Promise<void>` — Resolves when the operation is cancelled.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"cancel"` |
+| `CANCEL_FAILED` | The server reports cancellation failure |
+
+## Examples
+
+```ts
+// Cancel inference
+await cancel({ operation: "inference", modelId: "model-123" });
+```
+
+```ts
+// Pause download (preserves partial file for automatic resume)
+await cancel({ operation: "downloadAsset", downloadKey: "download-key" });
+```
+
+```ts
+// Cancel download completely (deletes partial file)
+await cancel({ operation: "downloadAsset", downloadKey: "download-key", clearCache: true });
+```
+
+```ts
+// Cancel RAG operation on default workspace
+await cancel({ operation: "rag" });
+```
+
+```ts
+// Cancel RAG operation on specific workspace
+await cancel({ operation: "rag", workspace: "my-workspace" });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/close.mdx b/docs/website/content/docs/v0.9.0/sdk/api/close.mdx
new file mode 100644
index 0000000000..3cc1240816
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/close.mdx
@@ -0,0 +1,35 @@
+---
+title: "close( )"
+titleStyle: code
+description: Closes the SDK client connection and releases all associated resources.
+---
+
+```ts
+function close(): Promise<void>;
+```
+
+Safe to call multiple times — subsequent calls are a no-op if already closed.
+
+## Returns
+
+`Promise<void>` — Resolves when the connection is closed.
+
+## Example
+
+```typescript
+import { loadModel, completion, close } from "@qvac/sdk";
+
+const modelId = await loadModel({
+  modelSrc: "/path/to/model.gguf",
+  modelType: "llm",
+});
+
+const result = completion({
+  modelId,
+  history: [{ role: "user", content: "Hello" }],
+});
+
+console.log(await result.text);
+
+await close();
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/completion.mdx b/docs/website/content/docs/v0.9.0/sdk/api/completion.mdx
new file mode 100644
index 0000000000..103db9cdc0
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/completion.mdx
@@ -0,0 +1,224 @@
+---
+title: "completion( )"
+titleStyle: code
+description: Generates completion from a language model based on conversation history.
+---
+
+```ts
+function completion(params: CompletionParams): {
+  tokenStream: AsyncGenerator<string>;
+  toolCallStream: AsyncGenerator<ToolCallEvent>;
+  text: Promise<string>;
+  toolCalls: Promise<ToolCallWithCall[]>;
+  stats: Promise<CompletionStats | undefined>;
+};
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`CompletionParams`](#completionparams) | ✓ | The completion parameters |
+
+### `CompletionParams`
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| modelId | `string` | ✓ | — | The identifier of the model to use for completion |
+| history | [`HistoryMessage[]`](#historymessage) | ✓ | — | Array of conversation messages |
+| stream | `boolean` | ✗ | `true` | Whether to stream tokens or return complete response |
+| tools | [`Tool[]`](#tool) ` \| ` [`ToolInput[]`](#toolinput) | ✗ | — | Optional array of tools (Zod-schema ToolInput or full Tool objects) |
+| mcp | [`McpClientInput[]`](#mcpclientinput) | ✗ | — | Optional array of MCP client inputs for tool integration |
+| kvCache | `boolean \| string` | ✗ | — | KV cache configuration — see [kvCache](#kvcache) |
+| generationParams | [`GenerationParams`](#generationparams) | ✗ | — | Optional sampling / generation parameters |
+| rpcOptions | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+#### `HistoryMessage`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| role | `string` | ✓ | Message role (e.g., `"user"`, `"assistant"`, `"system"`) |
+| content | `string` | ✓ | Message content |
+| attachments | [`Attachment[]`](#attachment) | ✗ | Optional file attachments for multimodal models |
+
+##### `Attachment`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| path | `string` | ✓ | File path to the attachment |
+
+#### `Tool`
+
+Full tool definition in JSON Schema format (sent to the model):
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| type | `"function"` | ✓ | Always `"function"` |
+| name | `string` | ✓ | Tool name |
+| description | `string` | ✓ | What the tool does |
+| parameters | `object` | ✓ | JSON Schema object describing the tool's input |
+| parameters.type | `"object"` | ✓ | Always `"object"` |
+| parameters.properties | `Record<string, { type, description?, enum? }>` | ✓ | Parameter definitions |
+| parameters.required | `string[]` | ✗ | Required parameter names |
+
+#### `ToolInput`
+
+Simplified tool definition using Zod schemas (auto-converted to `Tool` internally):
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| name | `string` | ✓ | Tool name |
+| description | `string` | ✓ | What the tool does |
+| parameters | `ZodObject` | ✓ | Zod schema describing the tool's input |
+| handler | `(args: Record<string, unknown>) => Promise<unknown>` | ✗ | Handler function — when provided, returned `ToolCallWithCall` objects include an `invoke()` method |
+
+#### `McpClientInput`
+
+MCP (Model Context Protocol) client input for tool integration:
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| client | [`McpClient`](#mcpclient) | ✓ | An MCP client instance |
+| includeResources | `boolean` | ✗ | Whether to include MCP resources |
+
+##### `McpClient`
+
+Duck-typed MCP client interface:
+
+| Method | Signature | Required? | Description |
+| --- | --- | :---: | --- |
+| listTools | `() => Promise<{ tools: McpTool[] }>` | ✓ | Lists available tools |
+| callTool | `(params: { name, arguments }) => Promise<McpToolCallResult>` | ✓ | Calls a tool by name |
+| listResources | `() => Promise<{ resources: McpResource[] }>` | ✗ | Lists available resources |
+| readResource | `(params: { uri }) => Promise<{ contents: unknown[] }>` | ✗ | Reads a resource by URI |
+
+#### `kvCache`
+
+Cache files are organized hierarchically: `{kvCacheKey}/{modelId}/{configHash}.bin`
+
+The `configHash` includes model config + system prompt to ensure cache isolation.
+
+| Value | Behavior |
+| --- | --- |
+| `true` | Auto-generate cache key based on conversation history |
+| `"custom-key"` | Use provided string as cache key for manual session management |
+| `false` / `undefined` | No caching |
+
+When cache exists, only the last message is sent to the model (includes multimodal attachments). Use [`deleteCache()`](../deleteCache) to remove cached sessions.
+
+#### `GenerationParams`
+
+Optional sampling and generation parameters (strict — no extra keys allowed):
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| temp | `number` | ✗ | Temperature (0–2) |
+| top_p | `number` | ✗ | Top-p (nucleus) sampling (0–1) |
+| top_k | `number` | ✗ | Top-k sampling |
+| predict | `number` | ✗ | Max tokens to predict. `-1` = until stop token, `-2` = until context filled |
+| seed | `number` | ✗ | Random seed for reproducibility |
+| frequency_penalty | `number` | ✗ | Frequency penalty |
+| presence_penalty | `number` | ✗ | Presence penalty |
+| repeat_penalty | `number` | ✗ | Repeat penalty |
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| tokenStream | `AsyncGenerator<string>` | Stream of generated tokens |
+| toolCallStream | `AsyncGenerator<`[`ToolCallEvent`](#toolcallevent)`>` | Stream of tool call events |
+| text | `Promise<string>` | Complete generated text (resolves after stream ends) |
+| toolCalls | `Promise<`[`ToolCallWithCall[]`](#toolcallwithcall)`>` | Tool calls made during completion (may include `invoke()` when handler is available) |
+| stats | `Promise<`[`CompletionStats`](#completionstats) `\| undefined>` | Performance statistics |
+
+### `CompletionStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| timeToFirstToken | `number` | Time to first token in milliseconds |
+| tokensPerSecond | `number` | Tokens generated per second |
+| cacheTokens | `number` | Number of cached tokens |
+| backendDevice | `"cpu" \| "gpu" \| undefined` | Compute backend used for inference |
+
+### `ToolCallEvent`
+
+Discriminated union on `type`. One of:
+
+**Tool call:**
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"toolCall"` | Event type |
+| call.id | `string` | Call identifier |
+| call.name | `string` | Tool name |
+| call.arguments | `Record<string, unknown>` | Tool arguments |
+| call.raw | `string` | Raw call text (optional) |
+
+**Tool call error:**
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"toolCallError"` | Event type |
+| error.code | `"PARSE_ERROR" \| "VALIDATION_ERROR" \| "UNKNOWN_TOOL"` | Error code |
+| error.message | `string` | Error message |
+| error.raw | `string` | Raw text (optional) |
+
+### `ToolCallWithCall`
+
+Extends `ToolCall` with an optional `invoke()` method:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| id | `string` | Call identifier |
+| name | `string` | Tool name |
+| arguments | `Record<string, unknown>` | Tool arguments |
+| raw | `string` | Raw call text (optional) |
+| invoke | `() => Promise<unknown>` | Executes the tool handler (present when a matching handler was provided) |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_TOOLS_ARRAY` | Invalid tools array provided |
+| `INVALID_TOOL_SCHEMA` | A tool has an invalid schema |
+
+## Example
+
+```typescript
+import { z } from "zod";
+
+const result = completion({
+  modelId: "llama-2",
+  history: [
+    { role: "user", content: "What's the weather in Tokyo?" }
+  ],
+  stream: true,
+  tools: [{
+    name: "get_weather",
+    description: "Get current weather",
+    parameters: z.object({
+      city: z.string().describe("City name"),
+    }),
+    handler: async (args) => {
+      return { temperature: 22, condition: "sunny" };
+    }
+  }],
+  generationParams: {
+    temp: 0.7,
+    top_p: 0.9,
+  }
+});
+
+for await (const token of result.tokenStream) {
+  process.stdout.write(token);
+}
+
+for (const toolCall of await result.toolCalls) {
+  if (toolCall.invoke) {
+    const toolResult = await toolCall.invoke();
+    console.log(toolResult);
+  }
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/defineDuplexHandler.mdx b/docs/website/content/docs/v0.9.0/sdk/api/defineDuplexHandler.mdx
new file mode 100644
index 0000000000..4e3522eada
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/defineDuplexHandler.mdx
@@ -0,0 +1,31 @@
+---
+title: "defineDuplexHandler( )"
+titleStyle: code
+description: Helper function to define a duplex (bidirectional streaming) handler with full type inference.
+---
+
+```ts
+function defineDuplexHandler<TRequest extends ZodType, TResponse extends ZodType>(
+  definition: DuplexPluginHandlerDefinition<TRequest, TResponse>
+): PluginHandlerDefinition<TRequest, TResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| definition | [`DuplexPluginHandlerDefinition`](#duplexpluginhandlerdefinition) | ✓ | The duplex handler definition with schemas and handler function |
+
+### `DuplexPluginHandlerDefinition`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| requestSchema | `ZodType` | ✓ | Zod schema for validating incoming requests |
+| responseSchema | `ZodType` | ✓ | Zod schema for validating outgoing responses |
+| streaming | `true` | ✓ | Must be `true` — duplex handlers are always streaming |
+| duplex | `true` | ✓ | Must be `true` — marks this handler as bidirectional |
+| handler | `(request, inputStream: AsyncIterable<Buffer>) => AsyncGenerator<response>` | ✓ | The handler function — receives a validated request and an input stream, yields validated response chunks |
+
+## Returns
+
+`PluginHandlerDefinition<TRequest, TResponse>` — The same definition object, with full type inference applied. This is an identity function used for type checking.
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/defineHandler.mdx b/docs/website/content/docs/v0.9.0/sdk/api/defineHandler.mdx
new file mode 100644
index 0000000000..ba8aa6ab2c
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/defineHandler.mdx
@@ -0,0 +1,30 @@
+---
+title: "defineHandler( )"
+titleStyle: code
+description: Helper function to define a handler with full type inference.
+---
+
+```ts
+function defineHandler<TRequest extends ZodType, TResponse extends ZodType>(
+  definition: PluginHandlerDefinition<TRequest, TResponse>
+): PluginHandlerDefinition<TRequest, TResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| definition | [`PluginHandlerDefinition`](#pluginhandlerdefinition) | ✓ | The handler definition with schemas and handler function |
+
+### `PluginHandlerDefinition`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| requestSchema | `ZodType` | ✓ | Zod schema for validating incoming requests |
+| responseSchema | `ZodType` | ✓ | Zod schema for validating outgoing responses |
+| streaming | `boolean` | ✓ | Whether this handler uses streaming responses |
+| handler | `(request) => Promise<response> \| AsyncGenerator<response>` | ✓ | The handler function — receives validated request, returns validated response |
+
+## Returns
+
+`PluginHandlerDefinition<TRequest, TResponse>` — The same definition object, with full type inference applied. This is an identity function used for type checking.
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/definePlugin.mdx b/docs/website/content/docs/v0.9.0/sdk/api/definePlugin.mdx
new file mode 100644
index 0000000000..eca1b05fc7
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/definePlugin.mdx
@@ -0,0 +1,68 @@
+---
+title: "definePlugin( )"
+titleStyle: code
+description: Helper function to define a plugin with full type inference.
+---
+
+```ts
+function definePlugin<T extends QvacPlugin>(plugin: T): T;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| plugin | [`QvacPlugin`](#qvacplugin) | ✓ | The plugin definition |
+
+### `QvacPlugin`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelType | `string` | ✓ | Unique identifier for the model type this plugin handles |
+| displayName | `string` | ✓ | Human-readable name for the plugin |
+| addonPackage | `string` | ✓ | The npm package name of the addon this plugin wraps |
+| createModel | `(params:` [`CreateModelParams`](#createmodelparams)`) =>` [`PluginModelResult`](#pluginmodelresult) | ✓ | Factory function that creates a model instance |
+| handlers | `Record<string,` [`PluginHandlerDefinition`](../defineHandler#pluginhandlerdefinition)`>` | ✓ | Map of handler names to handler definitions |
+| logging | [`PluginLogging`](#pluginlogging) | ✗ | Optional logging configuration |
+| loadConfigSchema | `ZodType` | ✗ | Zod schema used to validate and parse the `modelConfig` passed to `loadModel()` |
+| resolveConfig | `(modelConfig, ctx) => Promise<ResolveResult>` | ✗ | Optional hook to resolve model sources in modelConfig to local paths. Called before `createModel` if the plugin needs to download/resolve artifacts. Returns transformed config and optional artifact paths. |
+| skipPrimaryModelPathValidation | `boolean` | ✗ | When true, skips file-existence validation for modelPath. Use for plugins that derive paths from config. |
+
+### `CreateModelParams`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The model identifier |
+| modelPath | `string` | ✓ | Path to the model file |
+| modelConfig | `Record<string, unknown>` | ✗ | Model-specific configuration |
+| modelName | `string` | ✗ | Human-readable model name |
+| artifacts | `Record<string, string>` | ✗ | Additional file paths (e.g., `projectionModelPath`, `vadModelPath`, `ttsConfigModelPath`) |
+
+### `PluginModelResult`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| model | [`PluginModel`](#pluginmodel) | The model instance |
+| loader | `unknown` | Engine-specific loader reference |
+
+### `PluginModel`
+
+| Method | Signature | Required? | Description |
+| --- | --- | :---: | --- |
+| load | `(force?: boolean) => Promise<void>` | ✓ | Loads the model into memory |
+| unload | `() => void \| Promise<void>` | ✗ | Releases model resources |
+
+### `PluginLogging`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| module | `unknown` | ✓ | The addon logging module |
+| namespace | `string` | ✓ | Logger namespace |
+
+## Returns
+
+`T` — The same plugin object, with full type inference applied. This is an identity function used for type checking.
+
+## Note
+
+See the [Write a custom plugin](/sdk/examples/utilities/write-custom-plugin) guide for a full walkthrough.
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/deleteCache.mdx b/docs/website/content/docs/v0.9.0/sdk/api/deleteCache.mdx
new file mode 100644
index 0000000000..d7ec742621
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/deleteCache.mdx
@@ -0,0 +1,56 @@
+---
+title: "deleteCache( )"
+titleStyle: code
+description: Deletes KV cache files.
+---
+
+```ts
+function deleteCache(params: DeleteCacheParams): Promise<{ success: boolean }>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`DeleteCacheParams`](#deletecacheparams) | ✓ | The delete cache parameters |
+
+### `DeleteCacheParams`
+
+Union type. One of:
+
+#### Delete all caches
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| all | `true` | ✓ | Deletes all cache files |
+
+#### Delete by cache key
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| kvCacheKey | `string` | ✓ | The cache key to delete |
+| modelId | `string` | ✗ | Specific model ID to delete within the cache key. If not provided, deletes the entire cache key. |
+
+## Returns
+
+`Promise<{ success: boolean }>` — Resolves with the success status.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_DELETE_CACHE_PARAMS` | Neither `all` nor `kvCacheKey` was provided |
+| `DELETE_CACHE_FAILED` | The server reports cache deletion failure |
+
+## Examples
+
+```typescript
+// Delete all caches
+await deleteCache({ all: true });
+
+// Delete entire cache key (all models)
+await deleteCache({ kvCacheKey: "my-session" });
+
+// Delete only specific model within cache key
+await deleteCache({ kvCacheKey: "my-session", modelId: "model-abc123" });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/diffusion.mdx b/docs/website/content/docs/v0.9.0/sdk/api/diffusion.mdx
new file mode 100644
index 0000000000..256212e23a
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/diffusion.mdx
@@ -0,0 +1,107 @@
+---
+title: "diffusion( )"
+titleStyle: code
+description: Generates images using a loaded diffusion model.
+---
+
+```ts
+function diffusion(params: DiffusionClientParams): {
+  progressStream: AsyncGenerator<DiffusionProgressTick>;
+  outputs: Promise<Uint8Array[]>;
+  stats: Promise<DiffusionStats | undefined>;
+};
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`DiffusionClientParams`](#diffusionclientparams) | ✓ | The diffusion parameters |
+
+### `DiffusionClientParams`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the loaded diffusion model |
+| prompt | `string` | ✓ | Text prompt describing the image to generate |
+| negative_prompt | `string` | ✗ | Text describing what to avoid in the generated image |
+| width | `number` | ✗ | Image width in pixels (must be a multiple of 8) |
+| height | `number` | ✗ | Image height in pixels (must be a multiple of 8) |
+| steps | `number` | ✗ | Number of diffusion steps |
+| cfg_scale | `number` | ✗ | Classifier-free guidance scale for SD 1.x / 2.x / XL / SD3 models (typical range 1–20, default 7) |
+| guidance | `number` | ✗ | Distilled guidance for FLUX models (typical range 1–10, default 3.5) |
+| sampling_method | [`SamplingMethod`](#samplingmethod) | ✗ | Sampling algorithm |
+| scheduler | [`Scheduler`](#scheduler) | ✗ | Noise scheduler |
+| seed | `number` | ✗ | Random seed for reproducibility |
+| batch_count | `number` | ✗ | Number of images to generate |
+| vae_tiling | `boolean` | ✗ | Enable VAE tiling for large images on limited VRAM |
+| cache_preset | `string` | ✗ | Cache preset identifier |
+
+#### `SamplingMethod`
+
+`"euler" | "euler_a" | "heun" | "dpm2" | "dpm++2m" | "dpm++2mv2" | "dpm++2s_a" | "lcm" | "ipndm" | "ipndm_v" | "ddim_trailing" | "tcd" | "res_multistep" | "res_2s"`
+
+#### `Scheduler`
+
+`"discrete" | "karras" | "exponential" | "ays" | "gits" | "sgm_uniform" | "simple" | "lcm" | "smoothstep" | "kl_optimal" | "bong_tangent"`
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| progressStream | `AsyncGenerator<`[`DiffusionProgressTick`](#diffusionprogresstick)`>` | Stream of generation progress ticks |
+| outputs | `Promise<Uint8Array[]>` | Generated image buffers (resolves when generation completes) |
+| stats | `Promise<`[`DiffusionStats`](#diffusionstats) `\| undefined>` | Performance statistics |
+
+### `DiffusionProgressTick`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| step | `number` | Current diffusion step |
+| totalSteps | `number` | Total number of steps |
+| elapsedMs | `number` | Elapsed time in milliseconds |
+
+### `DiffusionStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| modelLoadMs | `number \| undefined` | Model loading time in milliseconds |
+| generationMs | `number \| undefined` | Single generation time in milliseconds |
+| totalGenerationMs | `number \| undefined` | Total generation time in milliseconds |
+| totalWallMs | `number \| undefined` | Total wall-clock time in milliseconds |
+| totalSteps | `number \| undefined` | Total diffusion steps performed |
+| totalGenerations | `number \| undefined` | Number of generations completed |
+| totalImages | `number \| undefined` | Number of images produced |
+| totalPixels | `number \| undefined` | Total pixels generated |
+| width | `number \| undefined` | Output image width |
+| height | `number \| undefined` | Output image height |
+| seed | `number \| undefined` | Seed used for generation |
+
+## Example
+
+```typescript
+import fs from "fs";
+
+// Basic usage
+const { outputs, stats } = diffusion({ modelId, prompt: "a cat" });
+const buffers = await outputs;
+fs.writeFileSync("output.png", buffers[0]);
+
+// With progress tracking
+const { progressStream, outputs: images } = diffusion({
+  modelId,
+  prompt: "a cat sitting on a windowsill",
+  width: 512,
+  height: 512,
+  steps: 20,
+  cfg_scale: 7,
+});
+
+for await (const { step, totalSteps } of progressStream) {
+  console.log(`${step}/${totalSteps}`);
+}
+
+const imageBuffers = await images;
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/downloadAsset.mdx b/docs/website/content/docs/v0.9.0/sdk/api/downloadAsset.mdx
new file mode 100644
index 0000000000..ac920e4f0b
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/downloadAsset.mdx
@@ -0,0 +1,54 @@
+---
+title: "downloadAsset( )"
+titleStyle: code
+description: Downloads an asset (model file) without loading it into memory.
+---
+
+```ts
+function downloadAsset(options: DownloadAssetOptions, rpcOptions?: RPCOptions): Promise<string>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`DownloadAssetOptions`](#downloadassetoptions) | ✓ | Download configuration |
+| rpcOptions | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `DownloadAssetOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| assetSrc | `string` | ✓ | The location from which the asset is downloaded (local path, remote URL, or Hyperdrive `pear://` URL) |
+| seed | `boolean` | ✗ | Whether to seed the asset on Hyperdrive after download |
+| onProgress | `(progress:` [`ModelProgressUpdate`](../loadModel#modelprogressupdate)`) => void` | ✗ | Callback for real-time download progress updates. When provided, streaming is used. |
+
+## Returns
+
+`Promise<string>` — Resolves to the asset ID (either the provided `assetSrc` or a generated ID).
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `DOWNLOAD_ASSET_FAILED` | The download operation fails |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Streaming ends without a final response (when using `onProgress`) |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"downloadAsset"` |
+
+## Example
+
+```typescript
+// Download model without loading
+const assetId = await downloadAsset({
+  assetSrc: "/path/to/model.gguf",
+  seed: true
+});
+
+// Download with progress tracking
+const assetId = await downloadAsset({
+  assetSrc: "pear://key123/model.gguf",
+  onProgress: (progress) => {
+    console.log(`Downloaded: ${progress.percentage}%`);
+  }
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/embed.mdx b/docs/website/content/docs/v0.9.0/sdk/api/embed.mdx
new file mode 100644
index 0000000000..7eb4d29f2c
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/embed.mdx
@@ -0,0 +1,67 @@
+---
+title: "embed( )"
+titleStyle: code
+description: Generates embeddings for a single text using a specified model.
+---
+
+```ts
+function embed(params: { modelId: string; text: string }, options?: RPCOptions): Promise<{ embedding: number[]; stats?: EmbedStats }>;
+function embed(params: { modelId: string; text: string[] }, options?: RPCOptions): Promise<{ embedding: number[][]; stats?: EmbedStats }>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`EmbedParams`](#embedparams) | ✓ | The embedding parameters |
+| options | [`RPCOptions`](./shared-types#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `EmbedParams`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the embedding model to use |
+| text | `string \| string[]` | ✓ | The input text(s) to embed. A single string returns `number[]`; an array returns `number[][]`. |
+
+## Returns
+
+`Promise<object>` — Resolves to an object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| embedding | `number[] \| number[][]` | The embedding vector(s). Single `number[]` when `text` is a string; `number[][]` when `text` is an array. |
+| stats | [`EmbedStats`](#embedstats) ` \| undefined` | Performance statistics |
+
+### `EmbedStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| totalTime | `number \| undefined` | Total embedding time in milliseconds |
+| tokensPerSecond | `number \| undefined` | Tokens processed per second |
+| totalTokens | `number \| undefined` | Total tokens processed |
+| backendDevice | `"cpu" \| "gpu" \| undefined` | Compute backend used for inference |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"embed"` |
+
+## Examples
+
+```typescript
+// Single text
+const { embedding, stats } = await embed({
+  modelId: "embedding-model",
+  text: "Hello world",
+});
+console.log(embedding.length); // e.g. 384
+console.log(stats?.tokensPerSecond);
+
+// Multiple texts (batch)
+const { embedding: vectors } = await embed({
+  modelId: "embedding-model",
+  text: ["Hello world", "How are you?"],
+});
+console.log(vectors.length); // 2
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/errors.mdx b/docs/website/content/docs/v0.9.0/sdk/api/errors.mdx
new file mode 100644
index 0000000000..273f237ff3
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/errors.mdx
@@ -0,0 +1,142 @@
+---
+title: Errors
+description: SDK error codes reference
+---
+
+## Example
+
+```typescript
+import { SDK_CLIENT_ERROR_CODES, SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await loadModel({ modelSrc: "/path/to/model.gguf", modelType: "llm" });
+} catch (error) {
+  if (error.code === SDK_SERVER_ERROR_CODES.MODEL_LOAD_FAILED) {
+    // handle model load failure
+  }
+}
+```
+
+## Client errors
+
+Thrown on the client side (response validation, RPC, provider). Access via `SDK_CLIENT_ERROR_CODES.{ERROR_NAME}`.
+
+| Error | Code | Summary | Thrown by |
+| --- | --- | --- | --- |
+| `INVALID_RESPONSE_TYPE` | 50001 | Invalid response type received. | `cancel()`, `downloadAsset()`, `embed()`, `finetune()`, `getModelInfo()`, `heartbeat()`, `loadModel()`, `loggingStream()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()`, `resume()`, `startQVACProvider()`, `stopQVACProvider()`, `suspend()`, `unloadModel()` |
+| `INVALID_OPERATION_IN_RESPONSE` | 50002 | Response operation didn't match the expected RAG operation. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | 50003 | Streaming RPC ended without a final response. | `downloadAsset()`, `finetune()`, `loadModel()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `INVALID_AUDIO_CHUNK_TYPE` | 50004 | Invalid audio chunk input type provided. | `transcribe()`, `transcribeStream()` |
+| `INVALID_TOOLS_ARRAY` | 50005 | Invalid tools array provided. | `completion()` |
+| `INVALID_TOOL_SCHEMA` | 50006 | Invalid tool schema provided. | `completion()` |
+| `OCR_FAILED` | 50007 | OCR operation failed. | `ocr()` |
+| `RPC_NO_HANDLER` | 50200 | No handler registered for request type. | Internal RPC layer |
+| `RPC_REQUEST_NOT_SENT` | 50201 | Request has not been sent yet. | Internal RPC layer |
+| `RPC_RESPONSE_STREAM_NOT_CREATED` | 50202 | Response stream not created. | Internal RPC layer |
+| `RPC_CONNECTION_FAILED` | 50203 | RPC connection failed. | Any API call (startup/transport) |
+| `PROVIDER_START_FAILED` | 50400 | Failed to start provider. | `startQVACProvider()` |
+| `PROVIDER_STOP_FAILED` | 50401 | Failed to stop provider. | `stopQVACProvider()` |
+| `DELEGATE_NO_FINAL_RESPONSE` | 50402 | No final response received from delegated provider. | `loadModel()`, `completion()` |
+| `DELEGATE_PROVIDER_ERROR` | 50403 | Delegated provider returned an error. | `loadModel()`, `completion()` |
+| `DELEGATE_CONNECTION_FAILED` | 50404 | Failed to connect to delegated provider. | `loadModel()`, `completion()` |
+| `SDK_NOT_FOUND_IN_NODE_MODULES` | 50600 | QVAC SDK not found in node_modules. | Any API call (during SDK initialization) |
+| `WORKER_FILE_NOT_FOUND` | 50601 | Worker file not found. | Any API call (during SDK initialization) |
+| `CONFIG_FILE_NOT_FOUND` | 50602 | Config file not found. | Any API call (during SDK initialization) |
+| `CONFIG_FILE_INVALID` | 50603 | Config file is invalid. | Any API call (during SDK initialization) |
+| `CONFIG_FILE_PARSE_FAILED` | 50604 | Failed to import/parse a `qvac.config.*` file. | Any API call (during SDK initialization) |
+| `CONFIG_VALIDATION_FAILED` | 50605 | Config validation failed (schema mismatch / invalid JSON). | Any API call (during SDK initialization) |
+| `PEAR_WORKER_ENTRY_REQUIRED` | 50606 | No plugins registered; Pear worker entry required. | Any API call (during SDK initialization) |
+| `MULTIPLE_SDK_INSTALLATIONS` | 50607 | Multiple QVAC SDK installations found. | Any API call (during SDK initialization) |
+| `PROFILER_INVALID_CAPACITY` | 50800 | Ring buffer capacity is below minimum. | `profiler.enable()` |
+
+## Server errors
+
+Thrown by the server (model operations, downloads, cache, RAG). Access via `SDK_SERVER_ERROR_CODES.{ERROR_NAME}`.
+
+| Error | Code | Summary | Thrown by |
+| --- | --- | --- | --- |
+| `MODEL_ALREADY_REGISTERED` | 52001 | Model with the same ID is already registered. | `loadModel()` |
+| `MODEL_NOT_FOUND` | 52002 | Model ID not found in the registry. | `completion()`, `embed()`, `getModelInfo()`, `loggingStream()`, `ocr()`, `textToSpeech()`, `transcribe()`, `transcribeStream()`, `translate()`, `unloadModel()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `MODEL_NOT_LOADED` | 52003 | Model exists but is not loaded. | `completion()`, `embed()`, `ocr()`, `textToSpeech()`, `transcribe()`, `transcribeStream()`, `translate()` |
+| `MODEL_IS_DELEGATED` | 52004 | Model is delegated and cannot be accessed as a local model. | `completion()`, `embed()`, `ocr()`, `textToSpeech()`, `transcribe()`, `transcribeStream()`, `translate()` |
+| `UNKNOWN_MODEL_TYPE` | 52005 | Unknown `modelType` in `loadModel()`. | `loadModel()` |
+| `MODEL_LOAD_FAILED` | 52200 | Failed to load model (generic). | `loadModel()` |
+| `MODEL_FILE_NOT_FOUND` | 52201 | Model file not found at given path. | `loadModel()` |
+| `MODEL_FILE_NOT_FOUND_IN_DIR` | 52202 | Expected model file not found in directory (e.g., by type). | `loadModel()` |
+| `MODEL_FILE_LOCATE_FAILED` | 52203 | Failed to locate model file for the given `modelType`. | `loadModel()` |
+| `PROJECTION_MODEL_REQUIRED` | 52204 | Projection model source is required for multimodal LLM models. | `loadModel()` |
+| `VAD_MODEL_REQUIRED` | 52205 | VAD model source is required for this configuration. | `loadModel()` |
+| `TTS_ARTIFACTS_REQUIRED` | 52208 | TTS (Chatterbox) model artifacts are missing. | `loadModel()` |
+| `TTS_REFERENCE_AUDIO_REQUIRED` | 52209 | TTS (Chatterbox) reference audio is required for voice cloning. | `loadModel()` |
+| `PARAKEET_ARTIFACTS_REQUIRED` | 52210 | Parakeet model sources are missing. | `loadModel()` |
+| `MODEL_UNLOAD_FAILED` | 52400 | Failed to unload model. | `unloadModel()` |
+| `EMBED_FAILED` | 52401 | Failed to generate embeddings. | `embed()` |
+| `EMBED_NO_EMBEDDINGS` | 52402 | No embeddings returned from model. | `embed()` |
+| `TRANSCRIPTION_FAILED` | 52403 | Transcription failed. | `transcribe()`, `transcribeStream()` |
+| `AUDIO_FILE_NOT_FOUND` | 52404 | Audio file not found or not accessible. | `transcribe()`, `transcribeStream()` |
+| `TRANSLATION_FAILED` | 52405 | Translation failed. | `translate()` |
+| `COMPLETION_FAILED` | 52406 | Completion failed. | `completion()` |
+| `ATTACHMENT_NOT_FOUND` | 52407 | Attachment file not found at path. | `completion()` |
+| `CANCEL_FAILED` | 52408 | Failed to cancel operation. | `cancel()` |
+| `TEXT_TO_SPEECH_FAILED` | 52409 | Text-to-speech operation failed. | `textToSpeech()` |
+| `CONFIG_RELOAD_NOT_SUPPORTED` | 52410 | Model does not support hot config reload. | `transcribe()`, `transcribeStream()` |
+| `MODEL_TYPE_MISMATCH` | 52411 | Model type mismatch (expected vs provided). | `completion()`, `embed()`, `ocr()`, `textToSpeech()`, `transcribe()`, `transcribeStream()`, `translate()`, `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `OCR_FAILED` | 52412 | OCR operation failed. | `ocr()` |
+| `IMAGE_FILE_NOT_FOUND` | 52413 | Image file not found or not accessible. | `ocr()` |
+| `INVALID_IMAGE_INPUT` | 52414 | Invalid image input type provided. | `ocr()` |
+| `RAG_SAVE_FAILED` | 52800 | Failed to save embeddings. | `ragSaveEmbeddings()` |
+| `RAG_SEARCH_FAILED` | 52801 | Failed to search embeddings. | `ragSearch()` |
+| `RAG_DELETE_FAILED` | 52802 | Failed to delete embeddings. | `ragDeleteEmbeddings()` |
+| `RAG_UNKNOWN_OPERATION` | 52803 | Unknown RAG operation. | `ragIngest()`, `ragReindex()` |
+| `RAG_HYPERDB_FAILED` | 52804 | HyperDB RAG operation failed. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `RAG_WORKSPACE_MODEL_MISMATCH` | 52805 | Workspace is configured for a different embeddings model. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `RAG_WORKSPACE_NOT_FOUND` | 52806 | RAG workspace not found. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `RAG_WORKSPACE_IN_USE` | 52807 | RAG workspace is in use and can't be closed/deleted. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `RAG_WORKSPACE_CLOSE_FAILED` | 52808 | Failed to close RAG workspace. | `ragCloseWorkspace()` |
+| `RAG_LIST_WORKSPACES_FAILED` | 52809 | Failed to list RAG workspaces. | `ragListWorkspaces()` |
+| `RAG_CHUNK_FAILED` | 52810 | Failed to chunk input into embeddings. | `ragChunk()`, `ragSaveEmbeddings()` |
+| `RAG_WORKSPACE_NOT_OPEN` | 52811 | RAG workspace is not open. | `ragDeleteEmbeddings()`, `ragSaveEmbeddings()`, `ragSearch()` |
+| `FILE_NOT_FOUND` | 53000 | File not found. | `loadModel()`, `downloadAsset()` |
+| `DOWNLOAD_CANCELLED` | 53001 | Download cancelled. | `cancel()`, `downloadAsset()`, `loadModel()` |
+| `CHECKSUM_VALIDATION_FAILED` | 53002 | Downloaded file checksum validation failed. | `downloadAsset()`, `loadModel()` |
+| `HTTP_ERROR` | 53003 | HTTP request failed with status code. | `downloadAsset()`, `loadModel()` |
+| `NO_RESPONSE_BODY` | 53004 | No response body received from HTTP request. | `downloadAsset()`, `loadModel()` |
+| `RESPONSE_BODY_NOT_READABLE` | 53005 | Response body is not readable. | `downloadAsset()`, `loadModel()` |
+| `NO_BLOB_FOUND` | 53006 | No blob found for the requested resource. | `downloadAsset()`, `loadModel()` |
+| `DOWNLOAD_ASSET_FAILED` | 53007 | Download failed. | `downloadAsset()`, `loadModel()` |
+| `SEEDING_NOT_SUPPORTED` | 53008 | Seeding is only supported for Hyperdrive models. | `loadModel()` |
+| `HYPERDRIVE_DOWNLOAD_FAILED` | 53009 | Hyperdrive download failed. | `downloadAsset()`, `loadModel()` |
+| `INVALID_SHARD_URL_PATTERN` | 53010 | Invalid shard URL pattern for sharded downloads. | `downloadAsset()`, `loadModel()` |
+| `ARCHIVE_EXTRACTION_FAILED` | 53011 | Failed to extract an archive. | `downloadAsset()`, `loadModel()` |
+| `ARCHIVE_UNSUPPORTED_TYPE` | 53012 | Unsupported archive type. | `downloadAsset()`, `loadModel()` |
+| `ARCHIVE_MISSING_SHARDS` | 53013 | Archive is missing required shards. | `downloadAsset()`, `loadModel()` |
+| `PARTIAL_DOWNLOAD_OFFLINE` | 53014 | Partial download exists but offline prevents resuming. | `downloadAsset()`, `loadModel()` |
+| `REGISTRY_DOWNLOAD_FAILED` | 53015 | Registry download failed. | `downloadAsset()`, `loadModel()` |
+| `DELETE_CACHE_FAILED` | 53200 | Failed to delete cache. | `deleteCache()` |
+| `INVALID_DELETE_CACHE_PARAMS` | 53201 | Invalid parameters for `deleteCache()`. | `deleteCache()` |
+| `CACHE_DIR_NOT_ABSOLUTE` | 53202 | Cache directory must be an absolute path. | `deleteCache()` |
+| `CACHE_DIR_NOT_WRITABLE` | 53203 | Cache directory is not writable. | `deleteCache()` |
+| `SET_CONFIG_FAILED` | 53350 | Failed to set config. | Any API call (during SDK initialization) |
+| `CONFIG_ALREADY_SET` | 53351 | Config is immutable and has already been set. | Any API call (during SDK initialization) |
+| `FFMPEG_NOT_AVAILABLE` | 53500 | Audio decoding required but FFmpeg is not available. | `transcribe()`, `transcribeStream()` |
+| `AUDIO_PLAYER_FAILED` | 53501 | Audio player failed. | `textToSpeech()` |
+| `INVALID_AUDIO_CHUNK_TYPE` | 53502 | Invalid audio chunk type. | `transcribe()`, `transcribeStream()` |
+| `DELEGATE_NO_FINAL_RESPONSE` | 53700 | No final response received from delegated provider. | `completion()`, `loadModel()` |
+| `DELEGATE_CONNECTION_FAILED` | 53701 | Failed to connect to delegated provider. | `completion()`, `loadModel()` |
+| `DELEGATE_PROVIDER_ERROR` | 53702 | Delegated provider returned an error. | `completion()`, `loadModel()` |
+| `RPC_NO_DATA_RECEIVED` | 53703 | No data received from request. | Internal server RPC |
+| `RPC_UNKNOWN_REQUEST_TYPE` | 53704 | Unknown request type received. | Internal server RPC |
+| `LIFECYCLE_SUSPEND_FAILED` | 53600 | Failed to suspend one or more resources. | `suspend()` |
+| `LIFECYCLE_RESUME_FAILED` | 53601 | Failed to resume one or more resources. | `resume()` |
+| `PLUGIN_NOT_FOUND` | 53850 | Plugin not found for the specified model type. | `invokePlugin()`, `invokePluginStream()`, `loadModel()` |
+| `PLUGIN_HANDLER_NOT_FOUND` | 53851 | Handler not found in plugin. | `invokePlugin()`, `invokePluginStream()` |
+| `PLUGIN_REQUEST_VALIDATION_FAILED` | 53852 | Plugin request validation failed. | `invokePlugin()`, `invokePluginStream()` |
+| `PLUGIN_RESPONSE_VALIDATION_FAILED` | 53853 | Plugin response validation failed. | `invokePlugin()`, `invokePluginStream()` |
+| `PLUGIN_ALREADY_REGISTERED` | 53854 | Plugin already registered for model type. | `definePlugin()` |
+| `PLUGIN_HANDLER_TYPE_MISMATCH` | 53855 | Handler type mismatch (streaming vs non-streaming). | `invokePlugin()`, `invokePluginStream()` |
+| `PLUGIN_LOGGING_INVALID` | 53856 | Plugin has invalid logging configuration. | `definePlugin()` |
+| `PLUGIN_DEFINITION_INVALID` | 53857 | Plugin definition is invalid. | `definePlugin()` |
+| `PLUGIN_MODEL_TYPE_RESERVED` | 53858 | Model type is reserved for built-in plugins. | `definePlugin()` |
+| `PLUGIN_LOAD_CONFIG_VALIDATION_FAILED` | 53859 | Model config validation failed for plugin. | `loadModel()` |
+| `PATH_TRAVERSAL` | 53900 | Path traversal detected. | `loadModel()`, `downloadAsset()` |
+| `QVAC_MODEL_REGISTRY_QUERY_FAILED` | 53950 | Model registry query failed. | `modelRegistryGetModel()`, `modelRegistryList()`, `modelRegistrySearch()` |
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/finetune.mdx b/docs/website/content/docs/v0.9.0/sdk/api/finetune.mdx
new file mode 100644
index 0000000000..43c8e21271
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/finetune.mdx
@@ -0,0 +1,189 @@
+---
+title: "finetune( )"
+titleStyle: code
+description: Starts, resumes, inspects, pauses, or cancels a finetuning job for a loaded model.
+---
+
+```ts
+function finetune(params: FinetuneRunParams, rpcOptions?: RPCOptions): FinetuneHandle;
+function finetune(params: FinetuneStopParams | FinetuneGetStateParams, rpcOptions?: RPCOptions): Promise<FinetuneResult>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`FinetuneRunParams`](#finetunerunparams) \| [`FinetuneStopParams`](#finetunestopparams) \| [`FinetuneGetStateParams`](#finetunegetstateparams) | ✓ | The finetuning parameters — shape determines the overload |
+| rpcOptions | [`RPCOptions`](./shared-types#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `FinetuneRunParams`
+
+Used to start or resume a finetuning job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the loaded model to finetune |
+| operation | `"start" \| "resume"` | ✗ | Omit to let the add-on choose whether to start fresh or resume automatically |
+| options | [`FinetuneOptions`](#finetuneoptions) | ✓ | Finetuning configuration |
+
+### `FinetuneStopParams`
+
+Used to pause or cancel a running job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the model |
+| operation | `"pause" \| "cancel"` | ✓ | The stop operation |
+
+### `FinetuneGetStateParams`
+
+Used to inspect the current state of a finetuning job.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The identifier of the model |
+| operation | `"getState"` | ✓ | Must be `"getState"` |
+| options | [`FinetuneOptions`](#finetuneoptions) | ✓ | Finetuning configuration |
+
+### `FinetuneOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| trainDatasetDir | `string` | ✓ | Directory containing the training dataset |
+| validation | [`FinetuneValidation`](#finetunevalidation) | ✓ | Validation configuration |
+| outputParametersDir | `string` | ✓ | Directory where output adapter parameters are written |
+| numberOfEpochs | `number` | ✗ | Number of epochs to run |
+| learningRate | `number` | ✗ | Learning rate override |
+| contextLength | `number` | ✗ | Context length override |
+| batchSize | `number` | ✗ | Batch size override |
+| microBatchSize | `number` | ✗ | Micro batch size override |
+| assistantLossOnly | `boolean` | ✗ | Compute loss only on assistant tokens |
+| loraRank | `number` | ✗ | LoRA rank override |
+| loraAlpha | `number` | ✗ | LoRA alpha override |
+| loraInitStd | `number` | ✗ | LoRA initialization standard deviation |
+| loraSeed | `number` | ✗ | LoRA initialization seed |
+| loraModules | `string` | ✗ | Comma-separated LoRA module selection |
+| checkpointSaveDir | `string` | ✗ | Directory for checkpoint snapshots |
+| checkpointSaveSteps | `number` | ✗ | Checkpoint save interval (in steps) |
+| chatTemplatePath | `string` | ✗ | Custom chat template path |
+| lrScheduler | `"constant" \| "cosine" \| "linear"` | ✗ | Learning rate scheduler |
+| lrMin | `number` | ✗ | Minimum learning rate |
+| warmupRatio | `number` | ✗ | Warmup ratio (0–1) |
+| warmupRatioSet | `boolean` | ✗ | Enable warmup ratio |
+| warmupSteps | `number` | ✗ | Warmup step count |
+| warmupStepsSet | `boolean` | ✗ | Enable explicit warmup steps |
+| weightDecay | `number` | ✗ | Weight decay override |
+
+#### `FinetuneValidation`
+
+Discriminated union on `type`:
+
+| Variant | Fields | Description |
+| --- | --- | --- |
+| `{ type: "none" }` | — | No validation |
+| `{ type: "split", fraction?: number }` | fraction defaults to `0.05` | Split training data for validation |
+| `{ type: "dataset", path: string }` | — | Use a separate validation dataset |
+
+## Returns
+
+The return type depends on the `operation`:
+
+**Run overload** (operation omitted, `"start"`, or `"resume"`):
+
+`FinetuneHandle` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| progressStream | `AsyncGenerator<`[`FinetuneProgress`](#finetuneprogress)`>` | Stream of training progress ticks |
+| result | `Promise<`[`FinetuneResult`](#finetuneresult)`>` | Resolves when the job finishes |
+
+**Reply overload** (`"pause"`, `"cancel"`, or `"getState"`):
+
+`Promise<FinetuneResult>`
+
+### `FinetuneProgress`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| is_train | `boolean` | Whether this tick is from the training phase (vs validation) |
+| loss | `number \| null` | Current loss value |
+| loss_uncertainty | `number \| null` | Loss uncertainty |
+| accuracy | `number \| null` | Current accuracy |
+| accuracy_uncertainty | `number \| null` | Accuracy uncertainty |
+| global_steps | `number` | Total steps completed |
+| current_epoch | `number` | Current epoch index |
+| current_batch | `number` | Current batch index |
+| total_batches | `number` | Total batches in the epoch |
+| elapsed_ms | `number` | Elapsed time in milliseconds |
+| eta_ms | `number` | Estimated time remaining in milliseconds |
+
+### `FinetuneResult`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"finetune"` | Response type discriminator |
+| status | [`FinetuneStatus`](#finetunestatus) | Current job status |
+| stats | [`FinetuneStats`](#finetunestats) ` \| undefined` | Final training statistics (present when completed) |
+
+### `FinetuneStatus`
+
+`"IDLE" | "RUNNING" | "PAUSED" | "CANCELLED" | "COMPLETED"`
+
+### `FinetuneStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| train_loss | `number \| undefined` | Final training loss |
+| train_loss_uncertainty | `number \| null \| undefined` | Training loss uncertainty |
+| val_loss | `number \| undefined` | Final validation loss |
+| val_loss_uncertainty | `number \| null \| undefined` | Validation loss uncertainty |
+| train_accuracy | `number \| undefined` | Final training accuracy |
+| train_accuracy_uncertainty | `number \| null \| undefined` | Training accuracy uncertainty |
+| val_accuracy | `number \| undefined` | Final validation accuracy |
+| val_accuracy_uncertainty | `number \| null \| undefined` | Validation accuracy uncertainty |
+| learning_rate | `number \| undefined` | Final learning rate |
+| global_steps | `number` | Total steps completed |
+| epochs_completed | `number` | Total epochs completed |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"finetune"` |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Stream ended without receiving the terminal finetune response |
+
+## Example
+
+```typescript
+const handle = finetune({
+  modelId,
+  options: {
+    trainDatasetDir: "./dataset/train",
+    validation: { type: "split", fraction: 0.05 },
+    outputParametersDir: "./artifacts/lora",
+    numberOfEpochs: 2,
+  },
+});
+
+for await (const progress of handle.progressStream) {
+  console.log(progress.global_steps, progress.loss);
+}
+
+console.log(await handle.result);
+
+// Pause a running job
+const pauseResult = await finetune({ modelId, operation: "pause" });
+console.log(pauseResult.status); // "PAUSED"
+
+// Inspect current state
+const state = await finetune({
+  modelId,
+  operation: "getState",
+  options: {
+    trainDatasetDir: "./dataset/train",
+    validation: { type: "none" },
+    outputParametersDir: "./artifacts/lora",
+  },
+});
+console.log(state.status);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/getLogger.mdx b/docs/website/content/docs/v0.9.0/sdk/api/getLogger.mdx
new file mode 100644
index 0000000000..df388c1dc1
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/getLogger.mdx
@@ -0,0 +1,77 @@
+---
+title: "getLogger( )"
+titleStyle: code
+description: Creates or retrieves a cached logger instance for the given namespace.
+---
+
+```ts
+function getLogger(namespace: string, options?: LoggerOptions): Logger;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| namespace | `string` | ✓ | Logger namespace (used for identification and filtering) |
+| options | [`LoggerOptions`](#loggeroptions) | ✗ | Optional logger configuration. When omitted, the logger is cached by namespace. When provided, a new logger is always created. |
+
+### `LoggerOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| level | `"error" \| "warn" \| "info" \| "debug" \| "trace"` | ✗ | Log level |
+| namespace | `string` | ✗ | Override namespace |
+| transports | [`LogTransport[]`](#logtransport) | ✗ | Custom log transports |
+| enableConsole | `boolean` | ✗ | Whether to output logs to console |
+
+### `LogTransport`
+
+```ts
+type LogTransport = (
+  level: LogLevel,
+  namespace: string,
+  message: string,
+) => void | Promise<void>;
+```
+
+A callback function invoked for each log entry. `LogLevel` is `"error" | "warn" | "info" | "debug" | "trace"`.
+
+## Returns
+
+[`Logger`](#logger) — A logger instance.
+
+### `Logger`
+
+| Method | Signature | Description |
+| --- | --- | --- |
+| error | `(...args: unknown[]) => void` | Log at error level |
+| warn | `(...args: unknown[]) => void` | Log at warn level |
+| info | `(...args: unknown[]) => void` | Log at info level |
+| debug | `(...args: unknown[]) => void` | Log at debug level |
+| trace | `(...args: unknown[]) => void` | Log at trace level |
+| setLevel | `(level) => void` | Change the log level |
+| getLevel | `() => LogLevel` | Get the current log level |
+| addTransport | `(transport:` [`LogTransport`](#logtransport)`) => void` | Add a custom transport |
+| setConsoleOutput | `(enabled: boolean) => void` | Enable or disable console output |
+
+## Example
+
+```typescript
+import { getLogger } from "@qvac/sdk";
+
+const logger = getLogger("my-app");
+
+logger.info("Application started");
+logger.debug("Debug details:", { key: "value" });
+
+logger.setLevel("error");
+logger.info("This will not be logged");
+
+const verboseLogger = getLogger("my-app:verbose", {
+  level: "debug",
+  enableConsole: true,
+  transports: [(level, namespace, message) => {
+    // Custom transport: write to file, send to server, etc.
+  }],
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/getModelByName.mdx b/docs/website/content/docs/v0.9.0/sdk/api/getModelByName.mdx
new file mode 100644
index 0000000000..3971f84a11
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/getModelByName.mdx
@@ -0,0 +1,49 @@
+---
+title: "getModelByName( )"
+titleStyle: code
+description: Retrieves a model constant from the built-in registry by its human-readable name.
+---
+
+```ts
+function getModelByName(name: string): RegistryItem | undefined;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| name | `string` | ✓ | The human-readable model name (e.g., `"Llama 3.2 3B Q4"`) |
+
+## Returns
+
+[`RegistryItem`](#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found.
+
+### `RegistryItem`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| name | `string` | Human-readable model name |
+| registryPath | `string` | Registry path |
+| registrySource | `string` | Registry source |
+| blobCoreKey | `string` | Hyperdrive blob core key |
+| blobBlockOffset | `number` | Blob block offset |
+| blobBlockLength | `number` | Blob block length |
+| blobByteOffset | `number` | Blob byte offset |
+| modelId | `string` | Unique model identifier |
+| addon | `"llm" \| "whisper" \| "embeddings" \| "nmt" \| "vad" \| "tts" \| "ocr" \| "other"` | Model addon type |
+| expectedSize | `number` | Expected file size in bytes |
+| sha256Checksum | `string` | SHA-256 checksum |
+| engine | `string` | Inference engine |
+| quantization | `string` | Quantization level |
+| params | `string` | Model parameter count |
+
+## Example
+
+```typescript
+import { getModelByName } from "@qvac/sdk";
+
+const model = getModelByName("Llama 3.2 3B Q4");
+if (model) {
+  console.log(model.modelId, model.expectedSize);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/getModelByPath.mdx b/docs/website/content/docs/v0.9.0/sdk/api/getModelByPath.mdx
new file mode 100644
index 0000000000..96becb99f6
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/getModelByPath.mdx
@@ -0,0 +1,30 @@
+---
+title: "getModelByPath( )"
+titleStyle: code
+description: Looks up a model in the built-in catalog by its registry path.
+---
+
+```ts
+function getModelByPath(registryPath: string): RegistryItem | undefined;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| registryPath | `string` | ✓ | The full registry path of the model (e.g., `"llama-3.2-1b-instruct-q4_0-gguf"`) |
+
+## Returns
+
+[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found.
+
+## Example
+
+```typescript
+import { getModelByPath } from "@qvac/sdk";
+
+const model = getModelByPath("llama-3.2-1b-instruct-q4_0-gguf");
+if (model) {
+  console.log(model.name, model.expectedSize);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/getModelBySrc.mdx b/docs/website/content/docs/v0.9.0/sdk/api/getModelBySrc.mdx
new file mode 100644
index 0000000000..a109cf46c0
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/getModelBySrc.mdx
@@ -0,0 +1,31 @@
+---
+title: "getModelBySrc( )"
+titleStyle: code
+description: Retrieves a model constant from the built-in registry by its model ID and blob core key.
+---
+
+```ts
+function getModelBySrc(modelId: string, blobCoreKey: string): RegistryItem | undefined;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The unique model identifier |
+| blobCoreKey | `string` | ✓ | The Hyperdrive blob core key |
+
+## Returns
+
+[`RegistryItem`](../getModelByName#registryitem) ` | undefined` — The matching model constant, or `undefined` if not found. 
+
+## Example
+
+```typescript
+import { getModelBySrc } from "@qvac/sdk";
+
+const model = getModelBySrc("model-abc123", "blob-core-key-hex");
+if (model) {
+  console.log(model.name, model.expectedSize);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/getModelInfo.mdx b/docs/website/content/docs/v0.9.0/sdk/api/getModelInfo.mdx
new file mode 100644
index 0000000000..3a2b27eaa6
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/getModelInfo.mdx
@@ -0,0 +1,77 @@
+---
+title: "getModelInfo( )"
+titleStyle: code
+description: Retrieves detailed information about a model, including cache status and loaded instances.
+---
+
+```ts
+function getModelInfo(params: GetModelInfoParams): Promise<ModelInfo>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | `object` | ✓ | The query parameters |
+| params.name | `string` | ✓ | The model name to look up |
+
+## Returns
+
+`Promise<`[`ModelInfo`](#modelinfo)`>` — Detailed information about the model.
+
+### `ModelInfo`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| name | `string` | Model name |
+| modelId | `string` | Unique model identifier |
+| addon | `"llm" \| "whisper" \| "embeddings" \| "nmt" \| "vad" \| "tts" \| "ocr" \| "other"` | Model addon type |
+| expectedSize | `number` | Expected total file size in bytes |
+| sha256Checksum | `string` | SHA-256 checksum |
+| isCached | `boolean` | Whether the model is fully cached locally |
+| isLoaded | `boolean` | Whether the model is currently loaded in memory |
+| cacheFiles | [`CacheFileInfo[]`](#cachefileinfo) | Individual cache file details |
+| registryPath | `string` | Registry path (optional) |
+| registrySource | `string` | Registry source (optional) |
+| engine | `string` | Inference engine (optional) |
+| quantization | `string` | Quantization level (optional) |
+| params | `string` | Model parameter count (optional) |
+| actualSize | `number` | Actual cached size in bytes (optional) |
+| cachedAt | `Date` | When the model was cached (optional) |
+| loadedInstances | [`LoadedInstance[]`](#loadedinstance) | Currently loaded instances (optional) |
+
+### `CacheFileInfo`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| filename | `string` | File name |
+| path | `string` | Full file path |
+| expectedSize | `number` | Expected size in bytes |
+| sha256Checksum | `string` | SHA-256 checksum |
+| isCached | `boolean` | Whether this file is cached |
+| actualSize | `number` | Actual file size (optional) |
+| cachedAt | `Date` | When this file was cached (optional) |
+
+### `LoadedInstance`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| registryId | `string` | Registry identifier for this loaded instance |
+| loadedAt | `Date` | When the model was loaded |
+| config | `unknown` | Model configuration used at load time (optional) |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"getModelInfo"` |
+
+## Example
+
+```typescript
+import { getModelInfo } from "@qvac/sdk";
+
+const info = await getModelInfo({ name: "Llama 3.2 3B Q4" });
+console.log(`Cached: ${info.isCached}, Loaded: ${info.isLoaded}`);
+console.log(`Size: ${info.expectedSize} bytes`);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/heartbeat.mdx b/docs/website/content/docs/v0.9.0/sdk/api/heartbeat.mdx
new file mode 100644
index 0000000000..94c4dca47d
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/heartbeat.mdx
@@ -0,0 +1,61 @@
+---
+title: "heartbeat( )"
+titleStyle: code
+description: Checks if a delegated provider or the local SDK worker is responsive.
+---
+
+```ts
+function heartbeat(params?: { delegate?: DelegateBase }): Promise<HeartbeatResponse>;
+```
+
+Sends a heartbeat round-trip to verify that a delegated provider is online or that the local SDK worker is responsive. When called without arguments, checks the local worker.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | `object` | ✗ | Optional delegation target |
+| params.delegate | [`DelegateBase`](#delegatebase) | ✗ | The provider to check — omit to check the local worker |
+
+### `DelegateBase`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| topic | `string` | ✓ | Hyperswarm topic hex string |
+| providerPublicKey | `string` | ✓ | Provider peer public key hex string |
+| timeout | `number` | ✗ | Connection timeout in milliseconds (min 100) |
+| healthCheckTimeout | `number` | ✗ | Health check timeout in milliseconds (min 100) |
+
+## Returns
+
+`Promise<HeartbeatResponse>`
+
+### `HeartbeatResponse`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"heartbeat"` | Response type discriminator |
+| number | `number` | Round-trip sequence number |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"heartbeat"` |
+
+## Examples
+
+```typescript
+// Check if a delegated provider is online
+try {
+  await heartbeat({
+    delegate: { topic: "topicHex", providerPublicKey: "peerHex", timeout: 3000 },
+  });
+  console.log("Provider is online");
+} catch {
+  console.log("Provider is offline");
+}
+
+// Check if the local SDK worker is responsive
+await heartbeat();
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/index.mdx b/docs/website/content/docs/v0.9.0/sdk/api/index.mdx
new file mode 100644
index 0000000000..1a6e89d900
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/index.mdx
@@ -0,0 +1,75 @@
+---
+title: "@qvac/sdk"
+titleStyle: code
+description: API reference — v0.9.0
+---
+
+## Overview
+
+`@qvac/sdk` npm package exposes a function-centric, typed JS API.
+
+## Functions
+
+| Function | Summary |
+| --- | --- |
+| [`cancel()`](./cancel) | Cancels an ongoing operation. |
+| [`close()`](./close) | Closes the SDK client connection and releases all associated resources. |
+| [`completion()`](./completion) | Generates completion from a language model based on conversation history. |
+| [`defineDuplexHandler()`](./defineDuplexHandler) | Helper function to define a duplex (bidirectional streaming) handler with full type inference. |
+| [`defineHandler()`](./defineHandler) | Helper function to define a handler with full type inference. |
+| [`definePlugin()`](./definePlugin) | Helper function to define a plugin with full type inference. |
+| [`deleteCache()`](./deleteCache) | Deletes KV cache files. |
+| [`diffusion()`](./diffusion) | Generates images using a loaded diffusion model. |
+| [`downloadAsset()`](./downloadAsset) | Downloads an asset (model file) without loading it into memory. |
+| [`embed()`](./embed) | Generates embeddings for a single text using a specified model. |
+| [`finetune()`](./finetune) | Starts, resumes, inspects, pauses, or cancels a finetuning job. |
+| [`getLogger()`](./getLogger) | Creates or retrieves a namespaced logger instance. |
+| [`getModelByName()`](./getModelByName) | Looks up a model in the built-in catalog by its constant name. |
+| [`getModelByPath()`](./getModelByPath) | Looks up a model in the built-in catalog by its registry path. |
+| [`getModelBySrc()`](./getModelBySrc) | Looks up a model in the built-in catalog by model file ID and blob core key. |
+| [`getModelInfo()`](./getModelInfo) | Returns status information for a catalog model, including cache state and loaded instances. |
+| [`heartbeat()`](./heartbeat) | Checks if a delegated provider or the local SDK worker is responsive. |
+| [`invokePlugin()`](./invokePlugin) | Invoke a non-streaming plugin handler. |
+| [`invokePluginStream()`](./invokePluginStream) | Invoke a streaming plugin handler. |
+| [`loadModel()`](./loadModel) | Loads a machine learning model from a local path, remote URL, or Hyperdrive key. |
+| [`loggingStream()`](./loggingStream) | Opens a logging stream to receive real-time logs. |
+| [`modelRegistryGetModel()`](./modelRegistryGetModel) | Fetches a single model entry from the registry by its path and source. |
+| [`modelRegistryList()`](./modelRegistryList) | Returns all available models from the QVAC distributed model registry. |
+| [`modelRegistrySearch()`](./modelRegistrySearch) | Searches the model registry with optional filters for model type, engine, and quantization. |
+| [`ocr()`](./ocr) | Performs Optical Character Recognition (OCR) on an image to extract text. |
+| [`ragChunk()`](./ragChunk) | Chunks documents into smaller pieces for embedding. |
+| [`ragCloseWorkspace()`](./ragCloseWorkspace) | Closes a RAG workspace, releasing in-memory resources (Corestore, HyperDB adapter, RAG instance). |
+| [`ragDeleteEmbeddings()`](./ragDeleteEmbeddings) | Deletes document embeddings from the RAG vector database. |
+| [`ragDeleteWorkspace()`](./ragDeleteWorkspace) | Deletes a RAG workspace and all its data. |
+| [`ragIngest()`](./ragIngest) | Ingests documents into the RAG vector database. |
+| [`ragListWorkspaces()`](./ragListWorkspaces) | Lists all RAG workspaces with their open status. |
+| [`ragReindex()`](./ragReindex) | Reindexes the RAG database to optimize search performance. |
+| [`ragSaveEmbeddings()`](./ragSaveEmbeddings) | Saves pre-embedded documents to the RAG vector database. |
+| [`ragSearch()`](./ragSearch) | Searches for similar documents in the RAG vector database. |
+| [`resume()`](./resume) | Resumes all suspended Hyperswarm and Corestore resources. |
+| [`startQVACProvider()`](./startQVACProvider) | Starts a provider service that offers QVAC capabilities to remote peers. |
+| [`stopQVACProvider()`](./stopQVACProvider) | Stops a running provider service and leaves the specified topic. |
+| [`suspend()`](./suspend) | Suspends all active Hyperswarm and Corestore resources. |
+| [`textToSpeech()`](./textToSpeech) | Converts text to speech audio using a loaded TTS model. |
+| [`transcribe()`](./transcribe) | Collects all streaming results into a single string response. |
+| [`transcribeStream()`](./transcribeStream) | Streams audio transcription results in real-time, yielding text chunks as they become available. |
+| [`translate()`](./translate) | Translates text from one language to another using a specified translation model. |
+| [`unloadModel()`](./unloadModel) | Unloads a previously loaded model from the server. |
+
+## Object
+
+| Object | Summary |
+| --- | --- |
+| [`profiler`](./profiler) | Singleton object that collects and exports profiling data for SDK operations. |
+
+## Shared types
+
+<Card href="./shared-types" title="Shared types">
+  Common type definitions used across SDK functions.
+</Card>
+
+## Errors
+
+<Card href="./errors" title="Error codes">
+  All errors thrown by the SDK and how to handle them via `SDK_CLIENT_ERROR_CODES` and `SDK_SERVER_ERROR_CODES`.
+</Card>
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/invokePlugin.mdx b/docs/website/content/docs/v0.9.0/sdk/api/invokePlugin.mdx
new file mode 100644
index 0000000000..d2acb8c24d
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/invokePlugin.mdx
@@ -0,0 +1,48 @@
+---
+title: "invokePlugin( )"
+titleStyle: code
+description: Invoke a non-streaming plugin handler.
+---
+
+```ts
+function invokePlugin<TResponse = unknown, TParams = unknown>(
+  options: InvokePluginOptions<TParams>,
+  rpcOptions?: RPCOptions
+): Promise<TResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`InvokePluginOptions`](#invokepluginoptions) | ✓ | The invocation options |
+| rpcOptions | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `InvokePluginOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The model ID of the loaded plugin model |
+| handler | `string` | ✓ | The handler name to invoke (as defined in the plugin) |
+| params | `TParams` | ✓ | Parameters to pass to the handler (validated against the handler's `requestSchema`) |
+
+## Returns
+
+`Promise<TResponse>` — The handler's response (validated against the handler's `responseSchema`).
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"pluginInvoke"` |
+
+## Example
+
+```typescript
+const result = await invokePlugin<{ answer: string }>({
+  modelId: "my-custom-model",
+  handler: "summarize",
+  params: { text: "Long document..." },
+});
+console.log(result.answer);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/invokePluginStream.mdx b/docs/website/content/docs/v0.9.0/sdk/api/invokePluginStream.mdx
new file mode 100644
index 0000000000..27b4bfd7f3
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/invokePluginStream.mdx
@@ -0,0 +1,43 @@
+---
+title: "invokePluginStream( )"
+titleStyle: code
+description: Invoke a streaming plugin handler.
+---
+
+```ts
+function invokePluginStream<TResponse = unknown, TParams = unknown>(
+  options: InvokePluginOptions<TParams>,
+  rpcOptions?: RPCOptions
+): AsyncGenerator<TResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`InvokePluginOptions`](../invokePlugin#invokepluginoptions) | ✓ | The invocation options (same as [`invokePlugin()`](../invokePlugin)) |
+| rpcOptions | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+## Returns
+
+`AsyncGenerator<TResponse>` — Yields streamed chunks from the handler until `done` is received.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | A chunk's type does not match expected `"pluginInvokeStream"` |
+
+## Example
+
+```typescript
+const stream = invokePluginStream<{ token: string }>({
+  modelId: "my-custom-model",
+  handler: "generateStream",
+  params: { prompt: "Tell me a story" },
+});
+
+for await (const chunk of stream) {
+  process.stdout.write(chunk.token);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/loadModel.mdx b/docs/website/content/docs/v0.9.0/sdk/api/loadModel.mdx
new file mode 100644
index 0000000000..246dc0bbc9
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/loadModel.mdx
@@ -0,0 +1,359 @@
+---
+title: "loadModel( )"
+titleStyle: code
+description: Loads a machine learning model from a local path, remote URL, or Hyperdrive key.
+---
+
+```ts
+// Load new model
+function loadModel(options: LoadModelOptions, rpcOptions?: RPCOptions): Promise<string>;
+
+// Hot-reload config on an already-loaded model
+function loadModel(options: ReloadConfigOptions, rpcOptions?: RPCOptions): Promise<string>;
+```
+
+Supports multiple model types: LLM, Whisper (speech recognition), Parakeet (NVIDIA NeMo transcription), embeddings, NMT (translation), TTS, and OCR. Handles local file paths, HTTP/HTTPS URLs, Hyperdrive URLs (`pear://`), and registry URLs.
+
+When `onProgress` is provided, streaming is used for real-time download progress. Otherwise, a simple request-response pattern is used.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`LoadModelOptions`](#loadmodeloptions) ` \| ` [`ReloadConfigOptions`](#reloadconfigoptions) | ✓ | Configuration for loading or hot-reloading a model |
+| rpcOptions | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `LoadModelOptions`
+
+Common fields present in all variants:
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| modelSrc | `string \| ModelDescriptor` | ✓ | — | Model source — local path, HTTP(S) URL, Hyperdrive `pear://` URL, registry URL, or a model constant object |
+| modelType | `string` | ✓ | — | The type of model — see [model type variants](#model-type-variants) |
+| modelConfig | `object` | ✗ | `{}` | Model-specific configuration (varies by `modelType`) |
+| seed | `boolean` | ✗ | `false` | Whether to seed the model on Hyperdrive after download |
+| delegate | [`Delegate`](#delegate) | ✗ | — | Delegation configuration for remote inference |
+| onProgress | `(progress: ModelProgressUpdate) => void` | ✗ | — | Callback for real-time download progress |
+| logger | `Logger` | ✗ | — | Logger instance — model operation logs are forwarded to this logger |
+
+### `Delegate`
+
+Optional delegation configuration for remote (P2P) inference:
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| topic | `string` | ✓ | — | P2P topic for delegation |
+| providerPublicKey | `string` | ✓ | — | Provider's public key |
+| timeout | `number` | ✗ | — | Timeout in milliseconds (min 100) |
+| fallbackToLocal | `boolean` | ✗ | `false` | Whether to fallback to local inference if delegation fails |
+| forceNewConnection | `boolean` | ✗ | `false` | Force a new connection to the provider |
+
+### `ReloadConfigOptions`
+
+Hot-reload configuration on an already-loaded model without reloading the model weights. Currently supported for Whisper models only.
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelId | `string` | ✓ | The ID of an existing loaded model (16-char hex) |
+| modelType | `string` | ✓ | The type of model (must match the loaded model) |
+| modelConfig | `object` | ✓ | New configuration to apply |
+
+### Model type variants
+
+The `modelType` field determines which variant of `modelConfig` is accepted.
+
+#### `"llm"`
+
+All LLM-specific fields live inside `modelConfig`. See [LLM `modelConfig`](#llm-modelconfig) for the full reference.
+
+#### `"whisper"`
+
+All Whisper-specific fields live inside `modelConfig`. See [Whisper `modelConfig`](#whisper-modelconfig) for the full reference.
+
+#### `"parakeet"`
+
+NVIDIA NeMo Parakeet models for speech recognition. `modelConfig` is **required**.
+
+See [Parakeet `modelConfig`](#parakeet-modelconfig) for the full reference.
+
+#### `"embeddings"`
+
+All embeddings fields live inside `modelConfig`. See [Embeddings `modelConfig`](#embeddings-modelconfig) for the full reference.
+
+#### `"nmt"`
+
+`modelConfig` is **required** and is a discriminated union on `engine`. See [NMT `modelConfig`](#nmt-modelconfig) for the full reference.
+
+#### `"tts"`
+
+`modelConfig` is **required** and is a discriminated union on `ttsEngine`:
+
+**Chatterbox engine** (`ttsEngine: "chatterbox"`):
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| ttsEngine | `"chatterbox"` | ✓ | Engine discriminator |
+| language | `"en" \| "es" \| "de" \| "it"` | ✓ | Output language |
+| ttsTokenizerSrc | `string \| ModelDescriptor` | ✓ | Tokenizer model source |
+| ttsSpeechEncoderSrc | `string \| ModelDescriptor` | ✓ | Speech encoder model source |
+| ttsEmbedTokensSrc | `string \| ModelDescriptor` | ✓ | Embed tokens model source |
+| ttsConditionalDecoderSrc | `string \| ModelDescriptor` | ✓ | Conditional decoder model source |
+| ttsLanguageModelSrc | `string \| ModelDescriptor` | ✓ | Language model source |
+| referenceAudioSrc | `string \| ModelDescriptor` | ✓ | Reference WAV file for voice cloning |
+
+**Supertonic engine** (`ttsEngine: "supertonic"`):
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| ttsEngine | `"supertonic"` | ✓ | Engine discriminator |
+| language | `"en" \| "es" \| "de" \| "it"` | ✓ | Output language |
+| ttsTokenizerSrc | `string \| ModelDescriptor` | ✓ | Tokenizer model source |
+| ttsTextEncoderSrc | `string \| ModelDescriptor` | ✓ | Text encoder model source |
+| ttsLatentDenoiserSrc | `string \| ModelDescriptor` | ✓ | Latent denoiser model source |
+| ttsVoiceDecoderSrc | `string \| ModelDescriptor` | ✓ | Voice decoder model source |
+| ttsVoiceSrc | `string \| ModelDescriptor` | ✓ | Voice `.bin` file source |
+| ttsSpeed | `number` | ✗ | Speech speed multiplier |
+| ttsNumInferenceSteps | `number` | ✗ | Number of inference steps |
+
+#### `"ocr"`
+
+All OCR-specific fields live inside `modelConfig`. See [OCR `modelConfig`](#ocr-modelconfig) for the full reference.
+
+### Custom plugin
+
+Any `modelType` string that is not a built-in type. `modelConfig` accepts `Record<string, unknown>`.
+
+### `modelConfig` reference
+
+#### LLM `modelConfig`
+
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| ctx_size | `number` | `1024` | Context window size |
+| device | `string` | `"gpu"` | Device to use |
+| gpu_layers | `number` | `99` | Number of layers offloaded to GPU |
+| system_prompt | `string` | `"You are a helpful assistant."` | System prompt |
+| temp | `number` | — | Temperature (0–2) |
+| top_p | `number` | — | Top-p sampling (0–1) |
+| top_k | `number` | — | Top-k sampling (0–128) |
+| seed | `number` | — | Random seed |
+| predict | `number` | — | Max tokens to predict. `-1` = until stop token, `-2` = until context filled |
+| lora | `string` | — | LoRA adapter path |
+| no_mmap | `boolean` | — | Disable memory-mapped I/O |
+| verbosity | `0 \| 1 \| 2 \| 3` | — | Engine verbosity — use exported `VERBOSITY` constant |
+| presence_penalty | `number` | — | Presence penalty |
+| frequency_penalty | `number` | — | Frequency penalty |
+| repeat_penalty | `number` | — | Repeat penalty |
+| stop_sequences | `string[]` | — | Custom stop sequences |
+| n_discarded | `number` | — | Number of discarded tokens |
+| tools | `boolean` | — | Enable tool calling support |
+| projectionModelSrc | `string \| ModelDescriptor` | — | Projection model source for multimodal models |
+
+#### Whisper `modelConfig`
+
+Common fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| language | `string` | Language code (e.g., `"en"`) |
+| translate | `boolean` | Whether to translate to English |
+| strategy | `"greedy" \| "beam_search"` | Sampling strategy |
+| temperature | `number` | Temperature |
+| initial_prompt | `string` | Initial prompt for the decoder |
+| detect_language | `boolean` | Auto-detect language |
+| vad_params | `object` | VAD parameters — `{ threshold?, min_speech_duration_ms?, min_silence_duration_ms?, max_speech_duration_s?, speech_pad_ms?, samples_overlap? }` |
+| audio_format | `"f32le" \| "s16le"` | Audio format |
+| contextParams | `object` | Context parameters — `{ model?, use_gpu?, flash_attn?, gpu_device? }` |
+| miscConfig | `object` | Miscellaneous config — `{ caption_enabled? }` |
+| vadModelSrc | `string \| ModelDescriptor` | VAD model source for voice activity detection |
+
+Additional fields: `n_threads`, `n_max_text_ctx`, `offset_ms`, `duration_ms`, `audio_ctx`, `no_context`, `no_timestamps`, `single_segment`, `print_special`, `print_progress`, `print_realtime`, `print_timestamps`, `token_timestamps`, `thold_pt`, `thold_ptsum`, `max_len`, `split_on_word`, `max_tokens`, `debug_mode`, `tdrz_enable`, `suppress_regex`, `suppress_blank`, `suppress_nst`, `length_penalty`, `temperature_inc`, `entropy_thold`, `logprob_thold`, `greedy_best_of`, `beam_search_beam_size`. All optional. See `whisperConfigSchema` in the source for details.
+
+#### Parakeet `modelConfig`
+
+`modelConfig` is **required**. Parakeet models support three variants via `modelType`: `"tdt"` (default), `"ctc"`, and `"sortformer"`.
+
+**Runtime config:**
+
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| modelType | `"tdt" \| "ctc" \| "sortformer"` | `"tdt"` | Parakeet model variant |
+| maxThreads | `number` | — | Maximum inference threads |
+| useGPU | `boolean` | — | Use GPU acceleration |
+| sampleRate | `number` | — | Audio sample rate |
+| channels | `number` | — | Audio channels |
+| captionEnabled | `boolean` | — | Enable caption mode |
+| timestampsEnabled | `boolean` | — | Enable timestamps in output |
+
+**Model sources (all `string \| ModelDescriptor`, all optional):**
+
+| Field | Description |
+| --- | --- |
+| parakeetEncoderSrc | TDT encoder model source |
+| parakeetEncoderDataSrc | TDT encoder data source |
+| parakeetDecoderSrc | TDT decoder model source |
+| parakeetVocabSrc | TDT vocabulary source |
+| parakeetPreprocessorSrc | TDT preprocessor source |
+| parakeetCtcModelSrc | CTC model source |
+| parakeetCtcModelDataSrc | CTC model data source |
+| parakeetTokenizerSrc | CTC tokenizer source |
+| parakeetSortformerSrc | Sortformer model source |
+
+#### Embeddings `modelConfig`
+
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| gpuLayers | `number` | `99` | Number of layers offloaded to GPU |
+| device | `"gpu" \| "cpu"` | `"gpu"` | Device to use |
+| batchSize | `number` | `1024` | Embedding batch size |
+| pooling | `"none" \| "mean" \| "cls" \| "last" \| "rank"` | — | Pooling strategy |
+| attention | `"causal" \| "non-causal"` | — | Attention type |
+| embdNormalize | `number` | — | Embedding normalization (integer) |
+| flashAttention | `"on" \| "off" \| "auto"` | — | Flash attention toggle |
+| mainGpu | `number \| "integrated" \| "dedicated"` | — | GPU device selection |
+| verbosity | `0 \| 1 \| 2 \| 3` | — | Engine verbosity — use exported `VERBOSITY` constant |
+
+#### NMT `modelConfig`
+
+Discriminated union on `engine`. Common generation parameters (all optional):
+
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| mode | `"full"` | `"full"` | Translation mode |
+| beamsize | `number` | `4` | Beam size |
+| lengthpenalty | `number` | `1.0` | Length penalty |
+| maxlength | `number` | `512` | Max output length |
+| repetitionpenalty | `number` | `1.0` | Repetition penalty |
+| norepeatngramsize | `number` | `0` | No-repeat n-gram size |
+| temperature | `number` | `0.3` | Temperature |
+| topk | `number` | `0` | Top-k sampling |
+| topp | `number` | `1.0` | Top-p sampling |
+
+Engine-specific:
+
+- **Opus**: _Deprecated in v1.0.0. Use Bergamot for European language pairs._
+- **Bergamot**: `from`/`to` accept 24 languages (en, ar, bg, ca, cs, de, es, et, fi, fr, hu, is, it, ja, ko, lt, lv, nl, pl, pt, ru, sk, sl, uk, zh). Additional fields: `srcVocabSrc`, `dstVocabSrc`, `normalize`, `pivotModel`
+- **IndicTrans**: `from`/`to` accept 26 Indic language codes (e.g., `"eng_Latn"`, `"hin_Deva"`)
+
+**Bergamot `pivotModel`** (optional) — for translation via an intermediate language:
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| modelSrc | `string \| ModelDescriptor` | ✓ | Pivot model source |
+| srcVocabSrc | `string \| ModelDescriptor` | ✗ | Source vocabulary file |
+| dstVocabSrc | `string \| ModelDescriptor` | ✗ | Destination vocabulary file |
+| normalize | `number` | ✗ | Normalization factor |
+
+Plus all common generation parameters above.
+
+#### OCR `modelConfig`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| langList | `string[]` | Languages to detect |
+| useGPU | `boolean` | Use GPU acceleration |
+| timeout | `number` | Timeout in milliseconds |
+| pipelineMode | `"easyocr" \| "doctr"` | OCR pipeline mode |
+| magRatio | `number` | Magnification ratio for detection |
+| defaultRotationAngles | `number[]` | Rotation angles to try |
+| contrastRetry | `boolean` | Retry with contrast adjustment |
+| lowConfidenceThreshold | `number` | Threshold for low-confidence filtering |
+| recognizerBatchSize | `number` | Batch size for recognizer |
+| decodingMethod | `"ctc" \| "attention"` | Decoding method |
+| straightenPages | `boolean` | Straighten pages before recognition |
+| detectorModelSrc | `string \| ModelDescriptor` | Detector model source |
+
+### `ModelProgressUpdate`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"modelProgress"` | Event type |
+| downloaded | `number` | Bytes downloaded so far |
+| total | `number` | Total bytes expected |
+| percentage | `number` | Download percentage |
+| downloadKey | `string` | Unique download key (use with [`cancel()`](../cancel)) |
+| shardInfo | `object` | Shard progress (optional, for sharded models) |
+| shardInfo.currentShard | `number` | Current shard index |
+| shardInfo.totalShards | `number` | Total number of shards |
+| shardInfo.shardName | `string` | Current shard file name |
+| shardInfo.overallDownloaded | `number` | Total bytes downloaded across all shards |
+| shardInfo.overallTotal | `number` | Total bytes across all shards |
+| shardInfo.overallPercentage | `number` | Overall percentage across all shards |
+| onnxInfo | `object` | ONNX multi-file progress (optional, for ONNX models) |
+| onnxInfo.currentFile | `string` | Current file being downloaded |
+| onnxInfo.fileIndex | `number` | Current file index |
+| onnxInfo.totalFiles | `number` | Total number of files |
+| onnxInfo.overallDownloaded | `number` | Total bytes downloaded across all files |
+| onnxInfo.overallTotal | `number` | Total bytes across all files |
+| onnxInfo.overallPercentage | `number` | Overall percentage across all files |
+
+## Returns
+
+`Promise<string>` — Resolves to the model ID (used to reference the model in subsequent API calls).
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `MODEL_LOAD_FAILED` | Model loading fails |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Streaming ends without a final response (when using `onProgress`) |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"loadModel"` |
+
+## Example
+
+```typescript
+// Local file path
+const modelId = await loadModel({
+  modelSrc: "/home/user/models/llama-7b.gguf",
+  modelType: "llm",
+  modelConfig: { ctx_size: 2048 }
+});
+
+// Remote URL with progress tracking
+const modelId = await loadModel({
+  modelSrc: "https://huggingface.co/.../model.gguf",
+  modelType: "llm",
+  onProgress: (progress) => {
+    console.log(`Downloaded: ${progress.percentage}%`);
+  }
+});
+
+// Hyperdrive URL
+const modelId = await loadModel({
+  modelSrc: "pear://<hyperdrive-key>/llama-7b.gguf",
+  modelType: "llm",
+  modelConfig: { ctx_size: 2048 }
+});
+
+// Multimodal model with projection
+const modelId = await loadModel({
+  modelSrc: "https://huggingface.co/.../main-model.gguf",
+  modelType: "llm",
+  modelConfig: {
+    ctx_size: 512,
+    projectionModelSrc: "https://huggingface.co/.../projection-model.gguf"
+  }
+});
+
+// Whisper with VAD model
+const modelId = await loadModel({
+  modelSrc: "https://huggingface.co/.../whisper-model.gguf",
+  modelType: "whisper",
+  modelConfig: {
+    language: "en",
+    strategy: "greedy",
+    vadModelSrc: "https://huggingface.co/.../vad-model.bin"
+  }
+});
+
+// With logger forwarding
+import { getLogger } from "@qvac/sdk";
+const logger = getLogger("my-app");
+
+const modelId = await loadModel({
+  modelSrc: "/path/to/model.gguf",
+  modelType: "llm",
+  logger
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/loggingStream.mdx b/docs/website/content/docs/v0.9.0/sdk/api/loggingStream.mdx
new file mode 100644
index 0000000000..ac516288cf
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/loggingStream.mdx
@@ -0,0 +1,53 @@
+---
+title: "loggingStream( )"
+titleStyle: code
+description: Opens a logging stream to receive real-time logs.
+---
+
+```ts
+function loggingStream(params: LoggingParams): AsyncGenerator<LoggingStreamResponse>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | `object` | ✓ | The logging stream parameters |
+| params.id | `string` | ✓ | The identifier to stream logs for. Pass a model ID for model logs, or the exported constant `SDK_LOG_ID` for SDK server logs. |
+
+## Returns
+
+`AsyncGenerator<`[`LoggingStreamResponse`](#loggingstreamresponse)`>` — Yields log messages in real time.
+
+### `LoggingStreamResponse`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| type | `"loggingStream"` | Response type |
+| id | `string` | Identifier being streamed |
+| level | `"error" \| "warn" \| "info" \| "debug"` | Log level |
+| namespace | `string` | Logger namespace |
+| message | `string` | Log message |
+| timestamp | `number` | Unix timestamp |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | A chunk's type does not match expected `"loggingStream"` |
+
+## Example
+
+```typescript
+import { loggingStream, SDK_LOG_ID } from "@qvac/sdk";
+
+// Stream logs from a loaded model
+const logStream = loggingStream({ id: "my-model-id" });
+
+for await (const logMessage of logStream) {
+  console.log(`[${logMessage.level}] ${logMessage.namespace}: ${logMessage.message}`);
+}
+
+// Or stream SDK server logs
+const sdkLogs = loggingStream({ id: SDK_LOG_ID });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryGetModel.mdx b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryGetModel.mdx
new file mode 100644
index 0000000000..06d395677c
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryGetModel.mdx
@@ -0,0 +1,52 @@
+---
+title: "modelRegistryGetModel( )"
+titleStyle: code
+description: Retrieves a single model entry from the QVAC model registry by path and source.
+---
+
+```ts
+function modelRegistryGetModel(registryPath: string, registrySource: string): Promise<ModelRegistryEntry>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| registryPath | `string` | ✓ | The registry path of the model |
+| registrySource | `string` | ✓ | The registry source identifier |
+
+## Returns
+
+`Promise<`[`ModelRegistryEntry`](#modelregistryentry)`>` — The matching model entry.
+
+### `ModelRegistryEntry`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| name | `string` | Human-readable model name |
+| registryPath | `string` | Registry path |
+| registrySource | `string` | Registry source |
+| blobCoreKey | `string` | Hyperdrive blob core key |
+| blobBlockOffset | `number` | Blob block offset |
+| blobBlockLength | `number` | Blob block length |
+| blobByteOffset | `number` | Blob byte offset |
+| modelId | `string` | Unique model identifier |
+| addon | `"llm" \| "whisper" \| "embeddings" \| "nmt" \| "vad" \| "tts" \| "ocr" \| "other"` | Model addon type |
+| expectedSize | `number` | Expected file size in bytes |
+| sha256Checksum | `string` | SHA-256 checksum |
+| engine | `string` | Inference engine |
+| quantization | `string` | Quantization level |
+| params | `string` | Parameter count |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `QVAC_MODEL_REGISTRY_QUERY_FAILED` | The registry query fails or model is not found |
+
+## Example
+
+```typescript
+const model = await modelRegistryGetModel("llama-3.2-3b-q4", "qvac");
+console.log(model.name, model.expectedSize);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryList.mdx b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryList.mdx
new file mode 100644
index 0000000000..ca397c736e
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistryList.mdx
@@ -0,0 +1,30 @@
+---
+title: "modelRegistryList( )"
+titleStyle: code
+description: Returns all available models from the QVAC distributed model registry.
+---
+
+```ts
+function modelRegistryList(): Promise<ModelRegistryEntry[]>;
+```
+
+## Returns
+
+`Promise<`[`ModelRegistryEntry[]`](../modelRegistryGetModel#modelregistryentry)`>` — Array of all models in the registry.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `QVAC_MODEL_REGISTRY_QUERY_FAILED` | The registry query fails |
+
+## Example
+
+```typescript
+import { modelRegistryList } from "@qvac/sdk";
+
+const models = await modelRegistryList();
+for (const model of models) {
+  console.log(model.registryPath, model.addon);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/modelRegistrySearch.mdx b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistrySearch.mdx
new file mode 100644
index 0000000000..bedf95f26f
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/modelRegistrySearch.mdx
@@ -0,0 +1,51 @@
+---
+title: "modelRegistrySearch( )"
+titleStyle: code
+description: Searches the QVAC model registry with optional filters.
+---
+
+```ts
+function modelRegistrySearch(params?: ModelRegistrySearchParams): Promise<ModelRegistryEntry[]>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`ModelRegistrySearchParams`](#modelregistrysearchparams) | ✗ | Optional search filters. If omitted, returns all models. |
+
+### `ModelRegistrySearchParams`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| filter | `string` | ✗ | Free-text filter on model name |
+| engine | `string` | ✗ | Filter by inference engine |
+| quantization | `string` | ✗ | Filter by quantization level |
+| modelType | `"llm" \| "whisper" \| "embeddings" \| "nmt" \| "vad" \| "tts" \| "ocr" \| "other"` | ✗ | Filter by model type (alias for `addon`) |
+| addon | `"llm" \| "whisper" \| "embeddings" \| "nmt" \| "vad" \| "tts" \| "ocr" \| "other"` | ✗ | Filter by addon type |
+
+## Returns
+
+`Promise<`[`ModelRegistryEntry[]`](../modelRegistryGetModel#modelregistryentry)`>` — Matching model entries. 
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `QVAC_MODEL_REGISTRY_QUERY_FAILED` | The registry query fails |
+
+## Example
+
+```typescript
+// Search LLM models
+const llmModels = await modelRegistrySearch({ modelType: "llm" });
+
+// Search by name
+const llamaModels = await modelRegistrySearch({ filter: "llama" });
+
+// Combined filters
+const models = await modelRegistrySearch({
+  modelType: "llm",
+  quantization: "Q4_K_M",
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ocr.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ocr.mdx
new file mode 100644
index 0000000000..8359caf706
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ocr.mdx
@@ -0,0 +1,76 @@
+---
+title: "ocr( )"
+titleStyle: code
+description: Performs Optical Character Recognition (OCR) on an image to extract text.
+---
+
+```ts
+function ocr(params: OCRClientParams): {
+  blockStream: AsyncGenerator<OCRTextBlock[]>;
+  blocks: Promise<OCRTextBlock[]>;
+  stats: Promise<OCRStats | undefined>;
+};
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | [`OCRClientParams`](#ocrclientparams) | ✓ | The OCR parameters |
+
+### `OCRClientParams`
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| modelId | `string` | ✓ | — | The identifier of the loaded OCR model |
+| image | `string \| Buffer` | ✓ | — | Image input as file path (string) or image buffer |
+| options | [`OCROptions`](#ocroptions) | ✗ | — | Optional OCR options |
+| stream | `boolean` | ✗ | `false` | Whether to stream blocks as they're detected |
+
+#### `OCROptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| paragraph | `boolean` | ✗ | Enable paragraph mode |
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| blockStream | `AsyncGenerator<`[`OCRTextBlock[]`](#ocrtextblock)`>` | Stream of detected text blocks (active when `stream: true`) |
+| blocks | `Promise<`[`OCRTextBlock[]`](#ocrtextblock)`>` | All detected text blocks (populated when `stream: false`) |
+| stats | `Promise<`[`OCRStats`](#ocrstats) `\| undefined>` | Performance statistics |
+
+### `OCRTextBlock`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| text | `string` | Detected text |
+| bbox | `[number, number, number, number]` | Bounding box `[x1, y1, x2, y2]` (optional) |
+| confidence | `number` | Detection confidence score (optional) |
+
+### `OCRStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| detectionTime | `number` | Detection phase time in ms (optional) |
+| recognitionTime | `number` | Recognition phase time in ms (optional) |
+| totalTime | `number` | Total OCR time in ms (optional) |
+
+## Example
+
+```typescript
+// Non-streaming mode (default)
+const { blocks } = ocr({ modelId, image: "/path/to/image.png" });
+for (const block of await blocks) {
+  console.log(block.text, block.bbox, block.confidence);
+}
+
+// Streaming mode
+const { blockStream } = ocr({ modelId, image: imageBuffer, stream: true });
+for await (const blocks of blockStream) {
+  console.log("Detected:", blocks);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/profiler.mdx b/docs/website/content/docs/v0.9.0/sdk/api/profiler.mdx
new file mode 100644
index 0000000000..2e72a80749
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/profiler.mdx
@@ -0,0 +1,296 @@
+---
+title: "profiler"
+titleStyle: code
+description: Singleton object that collects and exports profiling data for SDK operations.
+---
+
+```ts
+const profiler: {
+  enable(options?: ProfilerRuntimeOptions): void;
+  disable(): void;
+  isEnabled(): boolean;
+  exportJSON(options?: { includeRecentEvents?: boolean }): ProfilerExport;
+  exportTable(): string;
+  exportSummary(): string;
+  onRecord(callback: (event: ProfilingEvent) => void): () => void;
+  getConfig(): ResolvedProfilerConfig;
+  getAggregates(): Record<string, AggregatedStats>;
+  clear(): void;
+};
+```
+
+## Methods
+
+| Method | Description |
+| --- | --- |
+| [`enable()`](#enable) | Enables profiling and resets aggregated data |
+| [`disable()`](#disable) | Disables profiling |
+| [`isEnabled()`](#isenabled) | Returns whether profiling is currently enabled |
+| [`exportJSON()`](#exportjson) | Exports profiling data as a structured JSON object |
+| [`exportTable()`](#exporttable) | Exports aggregated stats as a formatted ASCII table |
+| [`exportSummary()`](#exportsummary) | Exports a human-readable summary string |
+| [`onRecord()`](#onrecord) | Registers a listener for profiling events; returns an unsubscribe function |
+| [`getConfig()`](#getconfig) | Returns the current effective profiler configuration |
+| [`getAggregates()`](#getaggregates) | Returns all aggregated stats keyed by operation name |
+| [`clear()`](#clear) | Clears all aggregated data and recent events |
+
+### `enable()`
+
+```ts
+function enable(options?: ProfilerRuntimeOptions): void
+```
+
+Enables profiling and resets all previously aggregated data.
+
+**Parameters**
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`ProfilerRuntimeOptions`](#profilerruntimeoptions) | ✗ | Runtime profiler options |
+
+#### `ProfilerRuntimeOptions`
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| mode | `"summary" \| "verbose"` | ✗ | `"summary"` | Profiling detail level — `"verbose"` retains recent events |
+| includeServerBreakdown | `boolean` | ✗ | `false` | Include server-side timing breakdown in profiling data |
+| operationFilters | `string[]` | ✗ | `[]` | Only profile operations whose names match these filters (empty = all) |
+
+**Returns**
+
+`void`
+
+### `disable()`
+
+```ts
+function disable(): void
+```
+
+Disables profiling. New SDK operations will no longer be recorded.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+`void`
+
+### `isEnabled()`
+
+```ts
+function isEnabled(): boolean
+```
+
+Returns whether profiling is currently enabled.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+`boolean` — `true` if profiling is enabled, `false` otherwise.
+
+### `exportJSON()`
+
+```ts
+function exportJSON(options?: { includeRecentEvents?: boolean }): ProfilerExport
+```
+
+Exports profiling data as a structured JSON object.
+
+**Parameters**
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | `object` | ✗ | Export options |
+| options.includeRecentEvents | `boolean` | ✗ | Include recent events in the export (only available in `"verbose"` mode) |
+
+**Returns**
+
+[`ProfilerExport`](#profilerexport) — structured JSON object containing configuration snapshot, aggregated statistics, and optionally recent events.
+
+#### `ProfilerExport`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| config | `object` | Snapshot of the profiler configuration at export time |
+| config.enabled | `boolean` | Whether profiling was enabled |
+| config.mode | `"summary" \| "verbose"` | Profiling mode |
+| config.includeServerBreakdown | `boolean` | Whether server breakdown was enabled |
+| config.operationFilters | `string[]` | Active operation filters |
+| config.maxRecentEvents | `number` | Max recent events setting |
+| aggregates | `Record<string,` [`AggregatedStats`](#aggregatedstats)`>` | Aggregated statistics keyed by operation name |
+| recentEvents | [`ProfilingEvent[]`](#profilingevent) | Recent profiling events (only when `includeRecentEvents: true`) |
+| exportedAt | `number` | Monotonic timestamp of the export |
+
+#### `AggregatedStats`
+
+Per-operation aggregated statistics:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| count | `number` | Number of recorded events |
+| min | `number` | Minimum duration in milliseconds |
+| max | `number` | Maximum duration in milliseconds |
+| avg | `number` | Average duration in milliseconds |
+| sum | `number` | Total accumulated duration in milliseconds |
+| last | `number` | Most recent duration in milliseconds |
+
+#### `ProfilingEvent`
+
+Individual profiling event recorded during an SDK operation:
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| ts | `number` | ✓ | Monotonic timestamp in milliseconds |
+| op | `string` | ✓ | Operation name (e.g., `"completion"`, `"loadModel"`) |
+| kind | [`ProfilingEventKind`](#profilingeventkind) | ✓ | Event category |
+| profileId | `string` | ✗ | Unique identifier for the profiling session |
+| phase | `string` | ✗ | Sub-phase within the operation (e.g., `"rpc.send"`, `"handler.run"`) |
+| ms | `number` | ✗ | Duration in milliseconds |
+| count | `number` | ✗ | Count metric (e.g., tokens, chunks) |
+| bytes | `number` | ✗ | Byte count metric |
+| gauges | `Record<string, number>` | ✗ | Numeric gauges (e.g., throughput, token counters) |
+| tags | `Record<string, string>` | ✗ | String tags (e.g., `handlerType`, `sourceType`, `modelId`) |
+
+#### `ProfilingEventKind`
+
+`"rpc" | "handler" | "download" | "load" | "delegation"`
+
+### `exportTable()`
+
+```ts
+function exportTable(): string
+```
+
+Exports aggregated stats as a formatted ASCII table suitable for console output.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+`string` — formatted ASCII table of all aggregated profiling data.
+
+### `exportSummary()`
+
+```ts
+function exportSummary(): string
+```
+
+Exports a human-readable summary of all aggregated profiling data.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+`string` — human-readable summary string.
+
+### `onRecord()`
+
+```ts
+function onRecord(callback: (event: ProfilingEvent) => void): () => void
+```
+
+Registers a listener that is called for every profiling event.
+
+**Parameters**
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| callback | `(event:` [`ProfilingEvent`](#profilingevent)`) => void` | ✓ | Function called with each profiling event |
+
+**Returns**
+
+`() => void` — an unsubscribe function. Call it to stop receiving events.
+
+### `getConfig()`
+
+```ts
+function getConfig(): ResolvedProfilerConfig
+```
+
+Returns the current effective profiler configuration.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+[`ResolvedProfilerConfig`](#resolvedprofilerconfig):
+
+#### `ResolvedProfilerConfig`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| enabled | `boolean` | Whether profiling is currently enabled |
+| mode | `"summary" \| "verbose"` | Active profiling mode |
+| includeServerBreakdown | `boolean` | Whether server breakdown is included |
+| operationFilters | `string[]` | Active operation filters |
+| maxRecentEvents | `number` | Maximum number of recent events retained (default 1000) |
+
+### `getAggregates()`
+
+```ts
+function getAggregates(): Record<string, AggregatedStats>
+```
+
+Returns all aggregated stats keyed by operation name.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+[`Record<string, AggregatedStats>`](#aggregatedstats) — all aggregated stats keyed by operation name.
+
+### `clear()`
+
+```ts
+function clear(): void
+```
+
+Clears all aggregated data and recent events. Does not disable profiling.
+
+**Parameters**
+
+No parameters.
+
+**Returns**
+
+`void`
+
+## Example
+
+```typescript
+import { profiler, completion, loadModel } from "@qvac/sdk";
+
+profiler.enable({ mode: "verbose", includeServerBreakdown: true });
+
+const modelId = await loadModel({
+  modelSrc: "/path/to/model.gguf",
+  modelType: "llm",
+});
+
+const result = completion({
+  modelId,
+  history: [{ role: "user", content: "Hello!" }],
+});
+
+await result.text;
+
+console.log(profiler.exportTable());
+
+const json = profiler.exportJSON({ includeRecentEvents: true });
+console.log("Aggregates:", json.aggregates);
+
+profiler.clear();
+profiler.disable();
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragChunk.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragChunk.mdx
new file mode 100644
index 0000000000..1208272219
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragChunk.mdx
@@ -0,0 +1,57 @@
+---
+title: "ragChunk( )"
+titleStyle: code
+description: Chunks documents into smaller pieces for embedding.
+---
+
+```ts
+function ragChunk(params: RagChunkParams, options?: RPCOptions): Promise<RagDoc[]>;
+```
+
+Part of the segregated flow: `ragChunk()` → [`embed()`](../embed) → [`ragSaveEmbeddings()`](../ragSaveEmbeddings)
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params | `object` | ✓ | The chunking parameters |
+| params.documents | `string \| string[]` | ✓ | Documents to chunk |
+| params.chunkOpts | [`ChunkOptions`](#chunkoptions) | ✗ | Chunking options |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+### `ChunkOptions`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| chunkSize | `number` | ✗ | Maximum chunk size |
+| chunkOverlap | `number` | ✗ | Overlap between chunks |
+| chunkStrategy | `"character" \| "paragraph"` | ✗ | Chunking strategy |
+| splitStrategy | `"character" \| "word" \| "token" \| "sentence" \| "line"` | ✗ | Text splitting strategy |
+
+## Returns
+
+`Promise<RagDoc[]>` — Array of chunk results.
+
+| Field | Type | Description |
+| --- | --- | --- |
+| id | `string` | Chunk identifier |
+| content | `string` | Chunk text content |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_CHUNK_FAILED` | The chunking operation fails |
+
+## Example
+
+```typescript
+const chunks = await ragChunk({
+  documents: ["Long document text here..."],
+  chunkOpts: {
+    chunkSize: 256,
+    chunkOverlap: 50,
+    chunkStrategy: "paragraph",
+  },
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragCloseWorkspace.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragCloseWorkspace.mdx
new file mode 100644
index 0000000000..ba4225b0f5
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragCloseWorkspace.mdx
@@ -0,0 +1,39 @@
+---
+title: "ragCloseWorkspace( )"
+titleStyle: code
+description: Closes a RAG workspace, releasing in-memory resources (Corestore, HyperDB adapter, RAG instance).
+---
+
+```ts
+function ragCloseWorkspace(params?: RagCloseWorkspaceParams, options?: RPCOptions): Promise<void>;
+```
+
+Releases Corestore, HyperDB adapter, and RAG instance. Workspace data remains on disk unless `deleteOnClose` is set.
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.workspace | `string` | ✗ | `"default"` | Workspace to close |
+| params.deleteOnClose | `boolean` | ✗ | `false` | If true, deletes workspace data from disk after closing |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+`Promise<void>` — Resolves when the workspace is closed.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_WORKSPACE_CLOSE_FAILED` | The close operation fails |
+
+## Example
+
+```typescript
+// Close a workspace
+await ragCloseWorkspace({ workspace: "my-docs" });
+
+// Close and delete in one call
+await ragCloseWorkspace({ workspace: "my-docs", deleteOnClose: true });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteEmbeddings.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteEmbeddings.mdx
new file mode 100644
index 0000000000..ffb86d7b2f
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteEmbeddings.mdx
@@ -0,0 +1,37 @@
+---
+title: "ragDeleteEmbeddings( )"
+titleStyle: code
+description: Deletes document embeddings from the RAG vector database.
+---
+
+```ts
+function ragDeleteEmbeddings(params: RagDeleteEmbeddingsParams, options?: RPCOptions): Promise<void>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.ids | `string[]` | ✓ | — | Array of document IDs to delete |
+| params.modelId | `string` | ✗ | — | Embedding model ID (required if no cached RAG instance) |
+| params.workspace | `string` | ✗ | `"default"` | Workspace to delete from |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+`Promise<void>` — Resolves when the embeddings are deleted.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_DELETE_FAILED` | The delete operation fails or workspace doesn't exist |
+
+## Example
+
+```typescript
+await ragDeleteEmbeddings({
+  ids: ["doc-1", "doc-2"],
+  workspace: "my-docs",
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteWorkspace.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteWorkspace.mdx
new file mode 100644
index 0000000000..9f4dd8b7f7
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragDeleteWorkspace.mdx
@@ -0,0 +1,34 @@
+---
+title: "ragDeleteWorkspace( )"
+titleStyle: code
+description: Deletes a RAG workspace and all its data.
+---
+
+```ts
+function ragDeleteWorkspace(params: RagDeleteWorkspaceParams, options?: RPCOptions): Promise<void>;
+```
+
+The workspace must not be currently loaded/in-use.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params.workspace | `string` | ✓ | Name of the workspace to delete |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+## Returns
+
+`Promise<void>` — Resolves when the workspace is deleted.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_DELETE_FAILED` | The workspace doesn't exist or is currently loaded |
+
+## Example
+
+```typescript
+await ragDeleteWorkspace({ workspace: "my-docs" });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragIngest.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragIngest.mdx
new file mode 100644
index 0000000000..5172542f2b
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragIngest.mdx
@@ -0,0 +1,51 @@
+---
+title: "ragIngest( )"
+titleStyle: code
+description: Ingests documents into the RAG vector database.
+---
+
+```ts
+function ragIngest(params: RagIngestParams, options?: RPCOptions): Promise<{ processed: RagSaveEmbeddingsResult[]; droppedIndices: number[] }>;
+```
+
+Full pipeline: chunk → embed → save. Implicitly opens (or creates) the workspace.
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✓ | — | The embedding model identifier |
+| params.documents | `string \| string[]` | ✓ | — | Documents to ingest |
+| params.chunk | `boolean` | ✗ | `true` | Whether to chunk documents before embedding |
+| params.chunkOpts | [`ChunkOptions`](../ragChunk#chunkoptions) | ✗ | — | Chunking options |
+| params.workspace | `string` | ✗ | `"default"` | Workspace for isolated storage. Created if it doesn't exist. |
+| params.onProgress | `(stage, current, total) => void` | ✗ | — | Progress callback |
+| params.progressInterval | `number` | ✗ | — | Minimum interval between progress updates in ms |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+| Field | Type | Description |
+| --- | --- | --- |
+| processed | `RagSaveEmbeddingsResult[]` | Array of `{ status: "fulfilled" \| "rejected", id?, error? }` |
+| droppedIndices | `number[]` | Indices of documents that were dropped |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_SAVE_FAILED` | The ingestion operation fails |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Streaming ends unexpectedly (when using `onProgress`) |
+
+## Example
+
+```typescript
+const result = await ragIngest({
+  modelId,
+  documents: ["Document 1", "Document 2"],
+  workspace: "my-docs",
+  onProgress: (stage, current, total) => {
+    console.log(`[${stage}] ${current}/${total}`);
+  },
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragListWorkspaces.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragListWorkspaces.mdx
new file mode 100644
index 0000000000..3b01f7b247
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragListWorkspaces.mdx
@@ -0,0 +1,37 @@
+---
+title: "ragListWorkspaces( )"
+titleStyle: code
+description: Lists all RAG workspaces with their open status.
+---
+
+```ts
+function ragListWorkspaces(options?: RPCOptions): Promise<RagWorkspaceInfo[]>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+## Returns
+
+`Promise<RagWorkspaceInfo[]>` — Array of workspace info.
+
+| Field | Type | Description |
+| --- | --- | --- |
+| name | `string` | Workspace name |
+| open | `boolean` | Whether the workspace is currently loaded in memory |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_LIST_WORKSPACES_FAILED` | The operation fails |
+
+## Example
+
+```typescript
+const workspaces = await ragListWorkspaces();
+// [{ name: "default", open: true }, { name: "my-docs", open: false }]
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragReindex.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragReindex.mdx
new file mode 100644
index 0000000000..670eab2ec1
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragReindex.mdx
@@ -0,0 +1,44 @@
+---
+title: "ragReindex( )"
+titleStyle: code
+description: Reindexes the RAG database to optimize search performance.
+---
+
+```ts
+function ragReindex(params: RagReindexParams, options?: RPCOptions): Promise<RagReindexResult>;
+```
+
+For HyperDB, rebalances centroids using k-means clustering. Requires a minimum number of documents (16 by default).
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✗ | — | Embedding model ID (required if no cached RAG instance) |
+| params.workspace | `string` | ✗ | `"default"` | Workspace to reindex. Must already exist. |
+| params.onProgress | `(stage, current, total) => void` | ✗ | — | Progress callback |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+| Field | Type | Description |
+| --- | --- | --- |
+| reindexed | `boolean` | Whether reindexing was performed |
+| details | `Record<string, unknown>` | Additional details (e.g., reason if skipped) — optional |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_SAVE_FAILED` | The reindex operation fails or workspace doesn't exist |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Streaming ends unexpectedly (when using `onProgress`) |
+
+## Example
+
+```typescript
+const result = await ragReindex({ workspace: "my-docs" });
+
+if (!result.reindexed) {
+  console.log("Reindex skipped:", result.details?.reason);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragSaveEmbeddings.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragSaveEmbeddings.mdx
new file mode 100644
index 0000000000..6368f231b9
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragSaveEmbeddings.mdx
@@ -0,0 +1,59 @@
+---
+title: "ragSaveEmbeddings( )"
+titleStyle: code
+description: Saves pre-embedded documents to the RAG vector database.
+---
+
+```ts
+function ragSaveEmbeddings(params: RagSaveEmbeddingsParams, options?: RPCOptions): Promise<RagSaveEmbeddingsResult[]>;
+```
+
+Part of the segregated flow: [`ragChunk()`](../ragChunk) → [`embed()`](../embed) → `ragSaveEmbeddings()`. Implicitly opens (or creates) the workspace.
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.documents | [`RagEmbeddedDoc[]`](#ragembeddeddoc) | ✓ | — | Pre-embedded documents |
+| params.modelId | `string` | ✗ | — | Embedding model ID (required if no cached RAG instance exists) |
+| params.workspace | `string` | ✗ | `"default"` | Workspace for isolated storage |
+| params.onProgress | `(stage, current, total) => void` | ✗ | — | Progress callback |
+| params.progressInterval | `number` | ✗ | — | Minimum interval between progress updates in ms |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+### `RagEmbeddedDoc`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| id | `string` | ✓ | Document identifier |
+| content | `string` | ✓ | Document text content |
+| embedding | `number[]` | ✓ | Pre-computed embedding vector |
+| embeddingModelId | `string` | ✓ | Model used to generate the embedding |
+| metadata | `Record<string, unknown>` | ✗ | Optional metadata |
+
+## Returns
+
+`Promise<RagSaveEmbeddingsResult[]>` — Array of `{ status: "fulfilled" | "rejected", id?, error? }`.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_SAVE_FAILED` | The save operation fails |
+| `STREAM_ENDED_WITHOUT_RESPONSE` | Streaming ends unexpectedly (when using `onProgress`) |
+
+## Example
+
+```typescript
+const chunks = await ragChunk({ documents: ["text1", "text2"] });
+const embeddings = await embed({ modelId, text: chunks.map(c => c.content) });
+const embeddedDocs = chunks.map((chunk, i) => ({
+  ...chunk,
+  embedding: embeddings[i],
+  embeddingModelId: modelId,
+}));
+const result = await ragSaveEmbeddings({
+  documents: embeddedDocs,
+  workspace: "my-workspace",
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/ragSearch.mdx b/docs/website/content/docs/v0.9.0/sdk/api/ragSearch.mdx
new file mode 100644
index 0000000000..490bdeb348
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/ragSearch.mdx
@@ -0,0 +1,47 @@
+---
+title: "ragSearch( )"
+titleStyle: code
+description: Searches for similar documents in the RAG vector database.
+---
+
+```ts
+function ragSearch(params: RagSearchParams, options?: RPCOptions): Promise<RagSearchResult[]>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✓ | — | The embedding model identifier |
+| params.query | `string` | ✓ | — | The search query text |
+| params.topK | `number` | ✗ | `5` | Number of top results to retrieve |
+| params.n | `number` | ✗ | `3` | Number of centroids for IVF index search |
+| params.workspace | `string` | ✗ | `"default"` | Workspace to search in |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+`Promise<RagSearchResult[]>` — Array of search results. Empty array if workspace doesn't exist.
+
+| Field | Type | Description |
+| --- | --- | --- |
+| id | `string` | Document identifier |
+| content | `string` | Document text content |
+| score | `number` | Similarity score |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `RAG_SEARCH_FAILED` | The search operation fails |
+
+## Example
+
+```typescript
+const results = await ragSearch({
+  modelId,
+  query: "AI and machine learning",
+  topK: 5,
+  workspace: "my-docs",
+});
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/resume.mdx b/docs/website/content/docs/v0.9.0/sdk/api/resume.mdx
new file mode 100644
index 0000000000..7873a76758
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/resume.mdx
@@ -0,0 +1,34 @@
+---
+title: "resume( )"
+titleStyle: code
+description: Resumes all suspended Hyperswarm and Corestore resources.
+---
+
+```ts
+function resume(): Promise<void>;
+```
+
+Resumes all suspended Hyperswarm and Corestore resources. Idempotent — calling while already active is a no-op. Also serves as the recovery path after a partial suspend failure.
+
+Typically used in mobile apps when the application returns to the foreground, paired with [`suspend()`](./suspend) when it moves to the background.
+
+## Parameters
+
+None.
+
+## Returns
+
+`Promise<void>`
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"resume"` |
+| `LIFECYCLE_RESUME_FAILED` | One or more resources failed to resume |
+
+## Example
+
+```typescript
+await resume();
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/shared-types.mdx b/docs/website/content/docs/v0.9.0/sdk/api/shared-types.mdx
new file mode 100644
index 0000000000..217971784d
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/shared-types.mdx
@@ -0,0 +1,22 @@
+---
+title: Shared types
+description: Shared type definitions used across SDK functions.
+---
+
+## `RPCOptions`
+
+Many functions accept an optional `rpcOptions` parameter for transport-level configuration:
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| timeout | `number` | ✗ | — | Request timeout in milliseconds (min 100) |
+| forceNewConnection | `boolean` | ✗ | `false` | Force a new RPC connection instead of reusing an existing one |
+| profiling | [`PerCallProfiling`](#percallprofiling) | ✗ | — | Per-call profiling configuration |
+
+### `PerCallProfiling`
+
+| Field | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| enabled | `boolean` | ✗ | Enable profiling for this call |
+| includeServerBreakdown | `boolean` | ✗ | Include server-side timing breakdown in profiling data |
+| mode | `"summary" \| "verbose"` | ✗ | Profiling detail level |
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/startQVACProvider.mdx b/docs/website/content/docs/v0.9.0/sdk/api/startQVACProvider.mdx
new file mode 100644
index 0000000000..249ef06150
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/startQVACProvider.mdx
@@ -0,0 +1,49 @@
+---
+title: "startQVACProvider( )"
+titleStyle: code
+description: Starts a provider service that offers QVAC capabilities to remote peers.
+---
+
+```ts
+function startQVACProvider(params: ProvideParams): Promise<object>;
+```
+
+The provider's keypair can be controlled via the seed option or the `QVAC_HYPERSWARM_SEED` environment variable.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params.topic | `string` | ✓ | Topic hex string for peer discovery |
+| params.firewall | [`FirewallConfig`](#firewallconfig) | ✗ | Optional firewall configuration |
+
+### `FirewallConfig`
+
+| Field | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| mode | `"allow" \| "deny"` | ✗ | `"allow"` | Firewall mode |
+| publicKeys | `string[]` | ✗ | `[]` | Public keys to allow or deny |
+
+## Returns
+
+`Promise<object>` — The provide response containing `success`, `publicKey`, and optional `error`.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"provide"` |
+| `PROVIDER_START_FAILED` | The server reports provider start failure |
+
+## Example
+
+```typescript
+const response = await startQVACProvider({
+  topic: "a1b2c3d4...",
+  firewall: {
+    mode: "allow",
+    publicKeys: ["peer-public-key-hex"],
+  },
+});
+console.log("Provider public key:", response.publicKey);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/stopQVACProvider.mdx b/docs/website/content/docs/v0.9.0/sdk/api/stopQVACProvider.mdx
new file mode 100644
index 0000000000..efc9188ce6
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/stopQVACProvider.mdx
@@ -0,0 +1,32 @@
+---
+title: "stopQVACProvider( )"
+titleStyle: code
+description: Stops a running provider service and leaves the specified topic.
+---
+
+```ts
+function stopQVACProvider(params: StopProvideParams): Promise<object>;
+```
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params.topic | `string` | ✓ | Topic hex string to leave |
+
+## Returns
+
+`Promise<object>` — The stop provide response containing `success` and optional `error`.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"stopProvide"` |
+| `PROVIDER_STOP_FAILED` | The server reports provider stop failure |
+
+## Example
+
+```typescript
+await stopQVACProvider({ topic: "a1b2c3d4..." });
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/suspend.mdx b/docs/website/content/docs/v0.9.0/sdk/api/suspend.mdx
new file mode 100644
index 0000000000..43872419cf
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/suspend.mdx
@@ -0,0 +1,34 @@
+---
+title: "suspend( )"
+titleStyle: code
+description: Suspends all active Hyperswarm and Corestore resources.
+---
+
+```ts
+function suspend(): Promise<void>;
+```
+
+Suspends all active Hyperswarm and Corestore resources. Idempotent — calling while already suspended is a no-op.
+
+Typically used in mobile apps when the application moves to the background, paired with [`resume()`](./resume) when it returns to the foreground.
+
+## Parameters
+
+None.
+
+## Returns
+
+`Promise<void>`
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"suspend"` |
+| `LIFECYCLE_SUSPEND_FAILED` | One or more resources failed to suspend (partial failure) |
+
+## Example
+
+```typescript
+await suspend();
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/textToSpeech.mdx b/docs/website/content/docs/v0.9.0/sdk/api/textToSpeech.mdx
new file mode 100644
index 0000000000..cfb93b7cdb
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/textToSpeech.mdx
@@ -0,0 +1,47 @@
+---
+title: "textToSpeech( )"
+titleStyle: code
+description: Converts text to speech audio.
+---
+
+```ts
+function textToSpeech(params: TtsClientParams, options?: RPCOptions): {
+  bufferStream: AsyncGenerator<number>;
+  buffer: Promise<number[]>;
+  done: Promise<boolean>;
+};
+```
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✓ | — | The identifier of the loaded TTS model |
+| params.text | `string` | ✓ | — | The text to convert to speech (non-empty) |
+| params.inputType | `string` | ✗ | `"text"` | Input type |
+| params.stream | `boolean` | ✗ | `true` | Whether to stream audio samples or return all at once |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| bufferStream | `AsyncGenerator<number>` | Stream of audio samples (active when `stream: true`) |
+| buffer | `Promise<number[]>` | Complete audio buffer (populated when `stream: false`) |
+| done | `Promise<boolean>` | Resolves to `true` when generation completes |
+
+## Example
+
+```typescript
+// Streaming mode
+const { bufferStream } = textToSpeech({ modelId, text: "Hello world" });
+for await (const sample of bufferStream) {
+  // process audio sample
+}
+
+// Non-streaming mode
+const { buffer } = textToSpeech({ modelId, text: "Hello world", stream: false });
+const audioData = await buffer;
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/transcribe.mdx b/docs/website/content/docs/v0.9.0/sdk/api/transcribe.mdx
new file mode 100644
index 0000000000..4cc46b12e0
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/transcribe.mdx
@@ -0,0 +1,53 @@
+---
+title: "transcribe( )"
+titleStyle: code
+description: Provides a simple interface for transcribing audio by collecting all streaming results into a single string response.
+---
+
+```ts
+function transcribe(params: TranscribeClientParams, options?: RPCOptions): Promise<string>;
+```
+
+Collects all streaming results from [`transcribeStream()`](../transcribeStream) into a single string.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params.modelId | `string` | ✓ | The identifier of the transcription model |
+| params.audioChunk | `string \| Buffer` | ✓ | Audio input as file path (string) or audio buffer |
+| params.prompt | `string` | ✗ | Optional initial prompt to guide the transcription |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+## Returns
+
+`Promise<string>` — The complete transcribed text.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `TRANSCRIPTION_FAILED` | Transcription fails |
+
+## Example
+
+```typescript
+const text = await transcribe({
+  modelId: "whisper-model",
+  audioChunk: "/path/to/audio.wav",
+});
+console.log(text);
+```
+
+## `SUPPORTED_AUDIO_FORMATS`
+
+Use this exported constant to check which audio formats are accepted before passing a file to `transcribe()`. Avoids runtime errors from unsupported formats.
+
+```typescript
+import { SUPPORTED_AUDIO_FORMATS } from "@qvac/sdk";
+
+const ext = filePath.split(".").pop();
+if (!SUPPORTED_AUDIO_FORMATS.includes(ext)) {
+  console.error(`Unsupported format: ${ext}`);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/transcribeStream.mdx b/docs/website/content/docs/v0.9.0/sdk/api/transcribeStream.mdx
new file mode 100644
index 0000000000..1d4178b3ad
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/transcribeStream.mdx
@@ -0,0 +1,56 @@
+---
+title: "transcribeStream( )"
+titleStyle: code
+description: Streams audio transcription results in real-time, yielding text chunks as they become available from the model.
+---
+
+```ts
+function transcribeStream(params: TranscribeClientParams, options?: RPCOptions): AsyncGenerator<string>;
+```
+
+Yields text chunks as they become available from the model.
+
+## Parameters
+
+| Name | Type | Required? | Description |
+| --- | --- | :---: | --- |
+| params.modelId | `string` | ✓ | The identifier of the transcription model |
+| params.audioChunk | `string \| Buffer` | ✓ | Audio input as file path (string) or audio buffer |
+| params.prompt | `string` | ✗ | Optional initial prompt to guide the transcription |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | Optional RPC transport options |
+
+## Returns
+
+`AsyncGenerator<string>` — Yields text chunks as they are transcribed.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `TRANSCRIPTION_FAILED` | Transcription fails |
+
+## Example
+
+```typescript
+const stream = transcribeStream({
+  modelId: "whisper-model",
+  audioChunk: "/path/to/audio.wav",
+});
+
+for await (const textChunk of stream) {
+  process.stdout.write(textChunk);
+}
+```
+
+## `SUPPORTED_AUDIO_FORMATS`
+
+Use this exported constant to check which audio formats are accepted before passing a file to `transcribeStream()`. Avoids runtime errors from unsupported formats.
+
+```typescript
+import { SUPPORTED_AUDIO_FORMATS } from "@qvac/sdk";
+
+const ext = filePath.split(".").pop();
+if (!SUPPORTED_AUDIO_FORMATS.includes(ext)) {
+  console.error(`Unsupported format: ${ext}`);
+}
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/translate.mdx b/docs/website/content/docs/v0.9.0/sdk/api/translate.mdx
new file mode 100644
index 0000000000..97c61ba238
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/translate.mdx
@@ -0,0 +1,80 @@
+---
+title: "translate( )"
+titleStyle: code
+description: Translates text from one language to another using a specified translation model.
+---
+
+```ts
+function translate(params: TranslateClientParams): {
+  tokenStream: AsyncGenerator<string>;
+  text: Promise<string>;
+  stats: Promise<TranslationStats | undefined>;
+};
+```
+
+Supports both NMT (Neural Machine Translation) and LLM models.
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✓ | — | The identifier of the translation model |
+| params.text | `string \| string[]` | ✓ | — | The input text(s) to translate. NMT supports array; LLM only string. |
+| params.modelType | `"nmt" \| "llm"` | ✓ | — | The type of translation model |
+| params.stream | `boolean` | ✗ | `true` | Whether to stream tokens or return complete response |
+| params.from | `string` | ✗ | auto-detect | Source language code (LLM only) |
+| params.to | `string` | ✓ (LLM) | — | Target language code (LLM only) |
+| params.context | `string` | ✗ | — | Additional context for translation (LLM only) |
+| options | [`RPCOptions`](../index#rpcoptions) | ✗ | — | Optional RPC transport options |
+
+## Returns
+
+`object` — Object with the following fields:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| tokenStream | `AsyncGenerator<string>` | Stream of translated tokens |
+| text | `Promise<string>` | Complete translated text |
+| stats | `Promise<`[`TranslationStats`](#translationstats) `\| undefined>` | Translation statistics |
+
+### `TranslationStats`
+
+| Field | Type | Description |
+| --- | --- | --- |
+| processedTokens | `number` | Number of tokens processed |
+| processingTime | `number` | Processing time in milliseconds |
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `TRANSLATION_FAILED` | Translation fails |
+
+## Example
+
+```typescript
+// Streaming mode (default)
+const result = translate({
+  modelId: "nmt-model",
+  text: "Hello world",
+  from: "en",
+  to: "es",
+  modelType: "llm",
+});
+
+for await (const token of result.tokenStream) {
+  process.stdout.write(token);
+}
+
+// Non-streaming mode
+const response = translate({
+  modelId: "nmt-model",
+  text: "Hello world",
+  from: "en",
+  to: "es",
+  modelType: "llm",
+  stream: false,
+});
+
+console.log(await response.text);
+```
diff --git a/docs/website/content/docs/v0.9.0/sdk/api/unloadModel.mdx b/docs/website/content/docs/v0.9.0/sdk/api/unloadModel.mdx
new file mode 100644
index 0000000000..4c7dd458b4
--- /dev/null
+++ b/docs/website/content/docs/v0.9.0/sdk/api/unloadModel.mdx
@@ -0,0 +1,38 @@
+---
+title: "unloadModel( )"
+titleStyle: code
+description: Unloads a previously loaded model from the server.
+---
+
+```ts
+function unloadModel(params: UnloadModelParams): Promise<void>;
+```
+
+When the last model is unloaded and no providers are active, the RPC connection is automatically closed, allowing the process to exit naturally.
+
+## Parameters
+
+| Name | Type | Required? | Default | Description |
+| --- | --- | :---: | --- | --- |
+| params.modelId | `string` | ✓ | — | The unique identifier of the model to unload |
+| params.clearStorage | `boolean` | ✗ | `false` | Whether to clear the storage for the model |
+
+## Returns
+
+`Promise<void>` — Resolves when the model is unloaded.
+
+## Throws
+
+| Error | When |
+| --- | --- |
+| `INVALID_RESPONSE_TYPE` | Response type does not match expected `"unloadModel"` |
+| `MODEL_UNLOAD_FAILED` | The server reports unload failure |
+
+## Example
+
+```typescript
+await unloadModel({ modelId: "model-123" });
+
+// Unload and clear cached files
+await unloadModel({ modelId: "model-123", clearStorage: true });
+```
diff --git a/docs/website/public/_redirects b/docs/website/public/_redirects
index 6d7cefe8ff..ab30997e50 100644
--- a/docs/website/public/_redirects
+++ b/docs/website/public/_redirects
@@ -9,3 +9,7 @@
 /v0.8.0/sdk/api/:page/ /v0.8.0/sdk/api/:page/index.html 200
 /v0.8.0/sdk/api/:page /v0.8.0/sdk/api/:page/index.html 200
 /v0.8.0/sdk/api/ /v0.8.0/sdk/api/index.html 200
+
+/v0.9.0/sdk/api/:page/ /v0.9.0/sdk/api/:page/index.html 200
+/v0.9.0/sdk/api/:page /v0.9.0/sdk/api/:page/index.html 200
+/v0.9.0/sdk/api/ /v0.9.0/sdk/api/index.html 200
diff --git a/docs/website/src/lib/trees/index.ts b/docs/website/src/lib/trees/index.ts
index 0360582056..8f601ed3b7 100644
--- a/docs/website/src/lib/trees/index.ts
+++ b/docs/website/src/lib/trees/index.ts
@@ -2,6 +2,7 @@ import type { Node } from 'fumadocs-core/page-tree';
 import { tree as latestTree } from './latest';
 import { tree as devTree } from './dev';
 import { tree as v070Tree } from './v0.7.0';
+import { tree as v080Tree } from './v0.8.0';
 
 /**
  * All sidebar trees keyed by version.
@@ -11,6 +12,7 @@ import { tree as v070Tree } from './v0.7.0';
 export function getAllTrees(): Record<string, Node[]> {
   return {
     'dev': devTree,
+    'v0.8.0': v080Tree,
     'v0.7.0': v070Tree,
     'latest': latestTree,
   };
diff --git a/docs/website/src/lib/trees/v0.8.0.ts b/docs/website/src/lib/trees/v0.8.0.ts
new file mode 100644
index 0000000000..f2d6745d52
--- /dev/null
+++ b/docs/website/src/lib/trees/v0.8.0.ts
@@ -0,0 +1,13 @@
+import type { Node } from 'fumadocs-core/page-tree';
+import { tree as latestTree, findFolderChildren } from './latest';
+import { source } from '@/lib/source';
+
+export const tree: Node[] = latestTree.map(node =>
+  node.type === 'folder' && node.name === 'JS API'
+    ? {
+        ...node,
+        index: node.index ? { ...node.index, url: '/v0.8.0/sdk/api' } : node.index,
+        children: findFolderChildren(source.pageTree.children, '/v0.8.0/sdk/api'),
+      }
+    : node
+);
diff --git a/docs/website/src/lib/versions.ts b/docs/website/src/lib/versions.ts
index 395f8ec49a..df8d8b19a3 100644
--- a/docs/website/src/lib/versions.ts
+++ b/docs/website/src/lib/versions.ts
@@ -7,11 +7,12 @@ export interface Version {
 
 export const VERSIONS: Version[] = [
   { label: 'dev', value: 'dev', isDev: true },
-  { label: 'latest (v0.8.0)', value: 'v0.8.0', isLatest: true },
+  { label: 'latest (v0.9.0)', value: 'v0.9.0', isLatest: true },
+  { label: 'v0.8.0', value: 'v0.8.0' },
   { label: 'v0.7.0', value: 'v0.7.0' },
 ];
 
-export const LATEST_VERSION = 'v0.8.0';
+export const LATEST_VERSION = 'v0.9.0';
 
 const VERSION_PREFIX_RE = /^\/(v\d+\.\d+\.\d+|dev)(\/|$)/;