From 0213970244e8e58ae49c6512aea61fdded84188f Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Mon, 27 Apr 2026 12:23:15 -0300
Subject: [PATCH 1/8] doc: content update - sdk - completion

---
 .../sdk/examples/ai-tasks/completion.mdx      | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/completion.mdx b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/completion.mdx
index f76cc23150..b01277ce99 100644
--- a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/completion.mdx
+++ b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/completion.mdx
@@ -33,21 +33,24 @@ You can load any [`llama.cpp`](https://github.com/ggml-org/llama.cpp)-compatible
 
 ## Features
 
-* Tool calls: let the model emit structured tool calls and stream tool-call events alongside tokens.
+* Event stream: `completion()` exposes a single ordered `events` async iterable plus an aggregated `final` promise. Events are discriminated by `type` — `contentDelta`, `thinkingDelta`, `toolCall`, `toolError`, `completionStats`, `completionDone`, `rawDelta` — see [`completion()`](/sdk/api/completion) for the full schema.
+* Thinking content: models that emit `<think>` blocks surface them as dedicated `thinkingDelta` events (enable with `captureThinking: true`), so consumers don't have to parse tags from raw text.
+* Tool calls: the model emits structured tool calls as `toolCall` events ordered alongside content and thinking in the same stream.
 * MCP: plug MCP servers into `completion()` so the model can use external tools (e.g., web search) via the same tool-call mechanism.
+* Raw output: with `emitRawDeltas: true`, every raw model token is also emitted as a `rawDelta` event in parallel to the structured events — useful for debugging or full-fidelity logging.
 * KV cache: cache and reuse the model’s key/value attention state to speed up follow-up turns in long conversations.
 
 ## Examples
 
 ### Usage
 
-The following script shows a basic example of completion:
+The canonical way to consume `completion()` is the `events` async iterable plus the aggregated `final` promise. The following script shows how to handle each event type and read the aggregated result:
 
 <Tabs>
 <Tab value="js" label="JavaScript" default>
 <WrapCode>
 
-```js file=<rootDir>/packages/sdk/dist/examples/llamacpp-p2p.js title="completion.js" lineNumbers
+```js file=<rootDir>/packages/sdk/dist/examples/completion-events.js title="completion-events.js" lineNumbers
 ```
 </WrapCode>
 </Tab>
@@ -55,15 +58,19 @@ The following script shows a basic example of completion:
 <Tab value="ts" label="TypeScript">
 <WrapCode>
 
-```ts file=<rootDir>/packages/sdk/examples/llamacpp-p2p.ts title="completion.ts" lineNumbers
+```ts file=<rootDir>/packages/sdk/examples/completion-events.ts title="completion-events.ts" lineNumbers
 ```
 </WrapCode>
 </Tab>
 </Tabs>
 
+<Callout type="info">
+The examples below (`Tool call`, `MCP`, `KV cache`) still consume `result.tokenStream` and `result.toolCallStream`, which are convenience wrappers around the canonical `events` / `final` stream shown above. Both APIs are supported; new code should prefer `events` / `final`.
+</Callout>
+
 ### Tool call
 
-The following script shows how to provide tool definitions to `completion()`, stream `toolCallStream` events, and read the parsed tool calls:
+The following script shows how to provide tool definitions to `completion()`, consume the streaming output, and read the parsed tool calls.
 
 <Tabs>
 <Tab value="js" label="JavaScript" default>
@@ -105,7 +112,7 @@ You create and manage the MCP client, connect it to one or more MCP servers, and
 </Tab>
 </Tabs>
 
-### KVcache
+### KV cache
 
 The following script enables `kvCache: true` to speed up follow-up turns, and then compares it with `kvCache: false` on the same history:
 
@@ -129,4 +136,4 @@ The following script enables `kvCache: true` to speed up follow-up turns, and th
 
 <Callout type="success">
 **Tip:** all examples throughout this documentation are self-contained and runnable. For instructions on how to run them, see [SDK quickstart](/sdk/getting-started/quickstart).
-</Callout>
\ No newline at end of file
+</Callout>

From 5f296c91509517d2e29ea3778f9f85a0db147660 Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Tue, 28 Apr 2026 22:18:24 -0300
Subject: [PATCH 2/8] doc: content new - SDK - runtime lifecycle

---
 .../examples/utilities/runtime-lifecycle.mdx  | 74 +++++++++++++++++++
 .../(latest)/sdk/getting-started/index.mdx    |  3 +-
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 docs/website/content/docs/(latest)/sdk/examples/utilities/runtime-lifecycle.mdx

diff --git a/docs/website/content/docs/(latest)/sdk/examples/utilities/runtime-lifecycle.mdx b/docs/website/content/docs/(latest)/sdk/examples/utilities/runtime-lifecycle.mdx
new file mode 100644
index 0000000000..590f576d31
--- /dev/null
+++ b/docs/website/content/docs/(latest)/sdk/examples/utilities/runtime-lifecycle.mdx
@@ -0,0 +1,74 @@
+---
+title: Runtime lifecycle
+description: Suspend and resume the SDK runtime (e.g., when the host app moves to background) and query lifecycle state.
+---
+
+## Overview
+
+While running, the SDK keeps live resources in the host process — Hyperswarm sockets, Corestore handles, RAG corestore, registered download streams, and a request gate above handler dispatch. When the host app moves to background (mobile suspend, desktop minimize, daemon hibernation), holding those resources open wastes battery and can break sockets that are torn down by the OS.
+
+`suspend()` pauses those resources and engages a lifecycle gate so non-lifecycle operations fail fast instead of hanging. `resume()` restores them when the app returns to foreground. `state()` is the source of truth for the current runtime state — useful for branching app logic without shadow-tracking suspend/resume locally.
+
+`suspend()`, `resume()`, and `state()` are themselves never blocked by the gate, and all three are idempotent.
+
+## Functions
+
+1. `suspend()` — call from the background handler
+2. `state()` — read `"active" | "suspending" | "suspended" | "resuming"`
+3. `resume()` — call from the foreground handler
+
+For how to use each function, see [SDK — API reference](/sdk/api/).
+
+## Lifecycle states
+
+`state()` returns one of:
+
+- `"active"` — all SDK operations are accepted normally
+- `"suspending"` — `suspend()` is in progress; non-lifecycle operations are already blocked
+- `"suspended"` — runtime is paused; only `suspend()`, `resume()`, `state()` are accepted
+- `"resuming"` — `resume()` is in progress; non-lifecycle operations are still blocked
+
+A partial `resume()` failure leaves the runtime in `"suspended"` (not `"active"`), so callers can retry `resume()` without leaking the failed state.
+
+## Behavior while suspended
+
+<Callout type="info">
+While runtime state is not `"active"`, only `suspend()`, `resume()`, and `state()` are accepted. Any other request fails fast with `LIFECYCLE_OPERATION_BLOCKED` instead of hanging.
+</Callout>
+
+In-flight operations started **before** `suspend()` follow the matrix below:
+
+| Operation | During suspend | After resume |
+| --- | --- | --- |
+| P2P / Hyperdrive download | Stalls cleanly | Continues automatically |
+| HTTP download | Bypass — bytes keep flowing | (Already flowing) |
+| Local native inference (e.g. `completion()`) | Runs to completion | n/a |
+| Delegated reply RPC | Stalls | Auto-recovers (subject to delegate `timeout`) |
+| Delegated stream RPC | Severed; consumer iterator hangs silently | Not recovered — re-issue after `resume()` |
+| New operation (e.g. `completion()`, `loadModel()`) | Throws `LIFECYCLE_OPERATION_BLOCKED` | Accepted |
+
+## Example
+
+The following script loads a model, runs a completion, suspends the runtime, demonstrates that a new `completion()` is blocked while suspended, then resumes and runs another completion. `state()` is sampled at each step:
+
+<Tabs>
+<Tab value="js" label="JavaScript" default>
+<WrapCode>
+
+```js file=<rootDir>/packages/sdk/dist/examples/suspend-resume.js title="runtime-lifecycle.js" lineNumbers
+```
+</WrapCode>
+</Tab>
+
+<Tab value="ts" label="TypeScript">
+<WrapCode>
+
+```ts file=<rootDir>/packages/sdk/examples/suspend-resume.ts title="runtime-lifecycle.ts" lineNumbers
+```
+</WrapCode>
+</Tab>
+</Tabs>
+
+<Callout type="success">
+**Tip:** all examples throughout this documentation are self-contained and runnable. For instructions on how to run them, see [SDK quickstart](/sdk/getting-started/quickstart).
+</Callout>
diff --git a/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx b/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
index 3b9b8c5971..069a034c37 100644
--- a/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
+++ b/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
@@ -67,6 +67,7 @@ The JS SDK is cross-platform, type-safe, and pluggable, exposing all QVAC capabi
 * [**Logging:**](/sdk/examples/utilities/logging) visibility into what's happening  during loading, inference, and other operations.
 * [**Profiler:**](/sdk/examples/utilities/profiler) measure and export timing metrics across model loading, inference, and P2P delegation.
 * [**Download Lifecycle:**](/sdk/examples/utilities/download-lifecycle) pause and resume model downloads.
+* [**Runtime lifecycle:**](/sdk/examples/utilities/runtime-lifecycle) suspend and resume the SDK runtime (e.g., on app background/foreground) and query lifecycle state.
 * [**Sharded models:**](/sdk/examples/utilities/sharded-models) download a model that is sharded into multiple parts.
 
 ## Flow
@@ -130,4 +131,4 @@ For more on loading models, see [`loadModel()` at `@qvac/sdk` API reference](/sd
 ## Other resources
 
 - [SDK landing page](https://qvac.tether.io/dev/sdk/)
-- [Package at npm](https://www.npmjs.com/package/@qvac/sdk)
\ No newline at end of file
+- [Package at npm](https://www.npmjs.com/package/@qvac/sdk)

From ce146189cbfd3e77a977e6125844a3a1f78183ee Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Tue, 28 Apr 2026 22:26:51 -0300
Subject: [PATCH 3/8] doc: update sidebar - add new page - runtime lifecycle

---
 docs/website/src/lib/trees/latest.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/website/src/lib/trees/latest.ts b/docs/website/src/lib/trees/latest.ts
index 0865cd8eb3..5c7bef9961 100644
--- a/docs/website/src/lib/trees/latest.ts
+++ b/docs/website/src/lib/trees/latest.ts
@@ -144,6 +144,7 @@ export const tree: Node[] = [
           { name: 'Logging', url: '/sdk/examples/utilities/logging', type: 'page', icon: resolveIcon('Activity') },
           { name: 'Profiler', url: '/sdk/examples/utilities/profiler', type: 'page', icon: resolveIcon('Timer') },
           { name: 'Download lifecycle', url: '/sdk/examples/utilities/download-lifecycle', type: 'page', icon: resolveIcon('Download') },
+          { name: 'Runtime lifecycle', url: '/sdk/examples/utilities/runtime-lifecycle', type: 'page', icon: resolveIcon('Moon') },
           { name: 'Sharded models', url: '/sdk/examples/utilities/sharded-models', type: 'page', icon: resolveIcon('Merge') },
         ],
       },

From f81772c5dfe47f652800a3328a9f3f5e48403802 Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Tue, 28 Apr 2026 23:25:22 -0300
Subject: [PATCH 4/8] doc: content update - SDK - diffusion - add img2img gen

---
 .../examples/ai-tasks/image-generation.mdx    | 33 +++++++++++++++++--
 .../(latest)/sdk/getting-started/index.mdx    |  2 +-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
index 3560596a1c..6eaf7d4243 100644
--- a/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
+++ b/docs/website/content/docs/(latest)/sdk/examples/ai-tasks/image-generation.mdx
@@ -1,11 +1,13 @@
 ---
 title: Image generation
-description: Text-to-image generation using Stable Diffusion.
+description: Text-to-image and image-to-image generation using Stable Diffusion.
 ---
 
 ## Overview
 
-Image generation uses [`qvac-ext-stable-diffusion.cpp`](https://github.com/tetherto/qvac-ext-stable-diffusion.cpp) as the inference engine. Load a supported model using `modelType: "diffusion"`. Then, provide a text `prompt` describing the image to generate. 
+Image generation uses [`qvac-ext-stable-diffusion.cpp`](https://github.com/tetherto/qvac-ext-stable-diffusion.cpp) as the inference engine. Load a supported model using `modelType: "diffusion"`. Then, provide a text `prompt` describing the image to generate.
+
+For image-to-image, also pass `init_image` (a `Uint8Array` of PNG bytes) — the model transforms the input guided by the prompt instead of starting from noise.
 
 `diffusion()` returns one or more PNG images as `Uint8Array` buffers. Use `progressStream` to track generation progress step-by-step.
 
@@ -74,6 +76,33 @@ The following script shows text-to-image generation using FLUX.2-klein with its
 </Tab>
 </Tabs>
 
+### Image-to-image
+
+Pass `init_image` to transform an existing image guided by a text prompt. Behavior depends on the model family:
+
+- **SD / SDXL / SD3**: SDEdit-style. Use `strength` to control how much the source is preserved (`0` = keep source, `1` = ignore source).
+- **FLUX.2**: in-context conditioning. Requires `prediction: "flux2_flow"` in `modelConfig` at `loadModel()` time; `strength` is ignored on this path.
+
+The following script loads an SD 2.1 model and transforms an input image using `strength: 0.5`:
+
+<Tabs>
+<Tab value="js" label="JavaScript" default>
+<WrapCode>
+
+```js file=<rootDir>/packages/sdk/dist/examples/diffusion-img2img.js title="diffusion-img2img.js" lineNumbers
+```
+</WrapCode>
+</Tab>
+
+<Tab value="ts" label="TypeScript">
+<WrapCode>
+
+```ts file=<rootDir>/packages/sdk/examples/diffusion-img2img.ts title="diffusion-img2img.ts" lineNumbers
+```
+</WrapCode>
+</Tab>
+</Tabs>
+
 <Callout type="success">
 **Tip:** all examples throughout this documentation are self-contained and runnable. For instructions on how to run them, see [SDK quickstart](/sdk/getting-started/quickstart).
 </Callout>
diff --git a/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx b/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
index 3b9b8c5971..9946a71ecb 100644
--- a/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
+++ b/docs/website/content/docs/(latest)/sdk/getting-started/index.mdx
@@ -51,7 +51,7 @@ The JS SDK is cross-platform, type-safe, and pluggable, exposing all QVAC capabi
 * [**Transcription:**](/sdk/examples/ai-tasks/transcription) automatic speech recognition (ASR) for speech-to-text via [`qvac-ext-lib-whisper.cpp`](https://github.com/tetherto/qvac-ext-lib-whisper.cpp) or [NVIDIA Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2).
 * [**Text-to-Speech:**](/sdk/examples/ai-tasks/text-to-speech) speech synthesis for text-to-speech (TTS) via [ONNX Runtime](https://onnxruntime.ai).
 * [**OCR:**](/sdk/examples/ai-tasks/ocr) optical character recognition (OCR) for extracting text from images via ONNX runtime.
-* [**Image generation:**](/sdk/examples/ai-tasks/image-generation) text-to-image generation via [`qvac-ext-stable-diffusion.cpp`](https://github.com/tetherto/qvac-ext-stable-diffusion.cpp).
+* [**Image generation:**](/sdk/examples/ai-tasks/image-generation) text-to-image and image-to-image generation via [`qvac-ext-stable-diffusion.cpp`](https://github.com/tetherto/qvac-ext-stable-diffusion.cpp).
 * [**Multimodal:**](/sdk/examples/ai-tasks/multimodal) LLM inference over text, images, and other media within a single conversation context.
 * [**Fine-tuning:**](/sdk/examples/ai-tasks/fine-tuning) adapting LLMs to domain-specific tasks via LoRA.
 * [**RAG:**](/sdk/examples/ai-tasks/rag) out-of-the-box retrieval-augmented generation workflow.

From c2018fd4cfdd3d03bf3c69680c772ebab6ec0be2 Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Wed, 29 Apr 2026 12:06:28 -0300
Subject: [PATCH 5/8] doc: replacement for PR 1735 to be closed

---
 .../(latest)/sdk/examples/utilities/write-custom-plugin.mdx     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/website/content/docs/(latest)/sdk/examples/utilities/write-custom-plugin.mdx b/docs/website/content/docs/(latest)/sdk/examples/utilities/write-custom-plugin.mdx
index 8768519020..de31fb386f 100644
--- a/docs/website/content/docs/(latest)/sdk/examples/utilities/write-custom-plugin.mdx
+++ b/docs/website/content/docs/(latest)/sdk/examples/utilities/write-custom-plugin.mdx
@@ -133,7 +133,7 @@ export const echoPlugin = definePlugin({
 
   createModel: (params: CreateModelParams): PluginModelResult => {
     const model = { id: params.modelId, load: async () => {} };
-    return { model, loader: null };
+    return { model };
   },
 
   handlers: {

From c5b7d8712f8f986f35817b2ebd92f444221f9e3e Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Wed, 29 Apr 2026 12:47:12 -0300
Subject: [PATCH 6/8] doc: new code example - SDK - img gen img2img with flux2

---
 .../examples/ai-tasks/image-generation.mdx    | 28 +++----
 .../examples/diffusion-flux2-klein-img2img.ts | 77 +++++++++++++++++++
 2 files changed, 92 insertions(+), 13 deletions(-)
 create mode 100644 packages/sdk/examples/diffusion-flux2-klein-img2img.ts

diff --git a/docs/website/content/docs/sdk/examples/ai-tasks/image-generation.mdx b/docs/website/content/docs/sdk/examples/ai-tasks/image-generation.mdx
index 31b83b1381..34bb66324b 100644
--- a/docs/website/content/docs/sdk/examples/ai-tasks/image-generation.mdx
+++ b/docs/website/content/docs/sdk/examples/ai-tasks/image-generation.mdx
@@ -24,23 +24,23 @@ For how to use each function, see [SDK — API reference](/sdk/api/).
 
 Supported model families and their file layouts:
 
+- **FLUX.2-klein**: split layout — diffusion model `*.gguf` + LLM text encoder `*.gguf` (via `llmModelSrc`) + VAE `*.safetensors` (via `vaeModelSrc`).
 - **SD1.x, SD2.x**: single all-in-one `*.gguf` file. No companion files needed.
 - **SDXL, SD3**: may require separate CLIP/T5 text encoder files (`clipLModelSrc`, `clipGModelSrc`, `t5XxlModelSrc`) in `modelConfig` depending on the model variant.
-- **FLUX.2-klein**: split layout — diffusion model `*.gguf` + LLM text encoder `*.gguf` (via `llmModelSrc`) + VAE `*.safetensors` (via `vaeModelSrc`).
 
 For models available as constants, see [SDK — Models](/sdk/getting-started#models).
 
 ## Examples
 
-### Stable Diffusion
+### FLUX.2-klein
 
-The following script shows a minimal text-to-image generation example using a single all-in-one SD 2.1 model:
+The following script shows text-to-image generation using FLUX.2-klein with its split-layout model (separate diffusion model, LLM text encoder, and VAE):
 
 <Tabs>
 <Tab value="js" label="JavaScript" default>
 <WrapCode>
 
-```js file=<rootDir>/packages/sdk/dist/examples/diffusion-simple.js title="diffusion-simple.js" lineNumbers
+```js file=<rootDir>/packages/sdk/dist/examples/diffusion-flux2-klein.js title="diffusion-flux2-klein.js" lineNumbers
 ```
 </WrapCode>
 </Tab>
@@ -48,21 +48,21 @@ The following script shows a minimal text-to-image generation example using a si
 <Tab value="ts" label="TypeScript">
 <WrapCode>
 
-```ts file=<rootDir>/packages/sdk/examples/diffusion-simple.ts title="diffusion-simple.ts" lineNumbers
+```ts file=<rootDir>/packages/sdk/examples/diffusion-flux2-klein.ts title="diffusion-flux2-klein.ts" lineNumbers
 ```
 </WrapCode>
 </Tab>
 </Tabs>
 
-### FLUX.2-klein
+### Stable Diffusion
 
-The following script shows text-to-image generation using FLUX.2-klein with its split-layout model (separate diffusion model, LLM text encoder, and VAE):
+The following script shows a minimal text-to-image generation example using a single all-in-one SD 2.1 model:
 
 <Tabs>
 <Tab value="js" label="JavaScript" default>
 <WrapCode>
 
-```js file=<rootDir>/packages/sdk/dist/examples/diffusion-flux2-klein.js title="diffusion-flux2-klein.js" lineNumbers
+```js file=<rootDir>/packages/sdk/dist/examples/diffusion-simple.js title="diffusion-simple.js" lineNumbers
 ```
 </WrapCode>
 </Tab>
@@ -70,26 +70,28 @@ The following script shows text-to-image generation using FLUX.2-klein with its
 <Tab value="ts" label="TypeScript">
 <WrapCode>
 
-```ts file=<rootDir>/packages/sdk/examples/diffusion-flux2-klein.ts title="diffusion-flux2-klein.ts" lineNumbers
+```ts file=<rootDir>/packages/sdk/examples/diffusion-simple.ts title="diffusion-simple.ts" lineNumbers
 ```
 </WrapCode>
 </Tab>
 </Tabs>
 
+
+
 ### Image-to-image
 
 Pass `init_image` to transform an existing image guided by a text prompt. Behavior depends on the model family:
 
-- **SD / SDXL / SD3**: SDEdit-style. Use `strength` to control how much the source is preserved (`0` = keep source, `1` = ignore source).
 - **FLUX.2**: in-context conditioning. Requires `prediction: "flux2_flow"` in `modelConfig` at `loadModel()` time; `strength` is ignored on this path.
+- **SD / SDXL / SD3**: SDEdit-style. Use `strength` to control how much the source is preserved (`0` = keep source, `1` = ignore source).
 
-The following script loads an SD 2.1 model and transforms an input image using `strength: 0.5`:
+The following script loads FLUX.2-klein in split-layout and transforms an input image using in-context conditioning (`prediction: "flux2_flow"`):
 
 <Tabs>
 <Tab value="js" label="JavaScript" default>
 <WrapCode>
 
-```js file=<rootDir>/packages/sdk/dist/examples/diffusion-img2img.js title="diffusion-img2img.js" lineNumbers
+```js file=<rootDir>/packages/sdk/dist/examples/diffusion-flux2-klein-img2img.js title="diffusion-flux2-klein-img2img.js" lineNumbers
 ```
 </WrapCode>
 </Tab>
@@ -97,7 +99,7 @@ The following script loads an SD 2.1 model and transforms an input image using `
 <Tab value="ts" label="TypeScript">
 <WrapCode>
 
-```ts file=<rootDir>/packages/sdk/examples/diffusion-img2img.ts title="diffusion-img2img.ts" lineNumbers
+```ts file=<rootDir>/packages/sdk/examples/diffusion-flux2-klein-img2img.ts title="diffusion-flux2-klein-img2img.ts" lineNumbers
 ```
 </WrapCode>
 </Tab>
diff --git a/packages/sdk/examples/diffusion-flux2-klein-img2img.ts b/packages/sdk/examples/diffusion-flux2-klein-img2img.ts
new file mode 100644
index 0000000000..65eccf96d4
--- /dev/null
+++ b/packages/sdk/examples/diffusion-flux2-klein-img2img.ts
@@ -0,0 +1,77 @@
+import {
+  loadModel,
+  unloadModel,
+  diffusion,
+  FLUX_2_KLEIN_4B_Q8_0,
+  FLUX_2_KLEIN_4B_VAE,
+  QWEN3_4B_Q4_K_M,
+} from "@qvac/sdk";
+import fs from "fs";
+import path from "path";
+
+// img2img with FLUX.2 [klein] split-layout — uses in-context conditioning ("flux2_flow").
+
+const inputPath = process.argv[2];
+const prompt = process.argv[3] || "oil painting style, vibrant colors";
+const outputDir = process.argv[4] || ".";
+const diffusionModelSrc = process.argv[5] || FLUX_2_KLEIN_4B_Q8_0;
+const llmModelSrc = process.argv[6] || QWEN3_4B_Q4_K_M;
+const vaeModelSrc = process.argv[7] || FLUX_2_KLEIN_4B_VAE;
+
+if (!inputPath) {
+  console.error("❌ Error: input image path is required");
+  console.error(
+    "Usage: bun run bare:example dist/examples/diffusion-flux2-klein-img2img.js <inputImage> [prompt] [outputDir] [diffusionModelSrc] [llmModelSrc] [vaeModelSrc]",
+  );
+  process.exit(1);
+}
+
+try {
+  console.log("Loading FLUX.2 [klein] split-layout model...");
+  const modelId = await loadModel({
+    modelSrc: diffusionModelSrc,
+    modelType: "diffusion",
+    modelConfig: {
+      device: "gpu",
+      threads: 4,
+      llmModelSrc,
+      vaeModelSrc,
+      prediction: "flux2_flow",
+    },
+    onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
+  });
+  console.log(`Model loaded: ${modelId}`);
+
+  const init_image = new Uint8Array(fs.readFileSync(inputPath));
+  console.log(`\nTransforming "${inputPath}" with prompt: "${prompt}"`);
+
+  const { progressStream, outputs, stats } = diffusion({
+    modelId,
+    prompt,
+    init_image,
+    steps: 20,
+    guidance: 3.5,
+    cfg_scale: 1,
+    seed: -1,
+  });
+
+  for await (const { step, totalSteps } of progressStream) {
+    process.stdout.write(`\rStep ${step}/${totalSteps}`);
+  }
+  console.log();
+
+  const buffers = await outputs;
+  for (let i = 0; i < buffers.length; i++) {
+    const outputPath = path.join(outputDir, `flux2_img2img_${i}.png`);
+    fs.writeFileSync(outputPath, buffers[i]!);
+    console.log(`Saved: ${outputPath}`);
+  }
+
+  console.log("\nStats:", await stats);
+  await unloadModel({ modelId, clearStorage: false });
+  console.log("Done.");
+  process.exit(0);
+} catch (error) {
+  console.error("❌ Error:", error);
+  process.exit(1);
+}

From 6350939d823f433fcbdbe21a753d28d9622be359 Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Wed, 29 Apr 2026 12:51:01 -0300
Subject: [PATCH 7/8] doc: fix broken link in completion page

---
 docs/website/content/docs/sdk/examples/ai-tasks/completion.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/website/content/docs/sdk/examples/ai-tasks/completion.mdx b/docs/website/content/docs/sdk/examples/ai-tasks/completion.mdx
index f5550c12dd..f8bc813b30 100644
--- a/docs/website/content/docs/sdk/examples/ai-tasks/completion.mdx
+++ b/docs/website/content/docs/sdk/examples/ai-tasks/completion.mdx
@@ -33,7 +33,7 @@ You can load any [`llama.cpp`](https://github.com/ggml-org/llama.cpp)-compatible
 
 ## Features
 
-* Event stream: `completion()` exposes a single ordered `events` async iterable plus an aggregated `final` promise. Events are discriminated by `type` — `contentDelta`, `thinkingDelta`, `toolCall`, `toolError`, `completionStats`, `completionDone`, `rawDelta` — see [`completion()`](/sdk/api/completion) for the full schema.
+* Event stream: `completion()` exposes a single ordered `events` async iterable plus an aggregated `final` promise. Events are discriminated by `type` — `contentDelta`, `thinkingDelta`, `toolCall`, `toolError`, `completionStats`, `completionDone`, `rawDelta`.
 * Thinking content: models that emit `<think>` blocks surface them as dedicated `thinkingDelta` events (enable with `captureThinking: true`), so consumers don't have to parse tags from raw text.
 * Tool calls: the model emits structured tool calls as `toolCall` events ordered alongside content and thinking in the same stream.
 * MCP: plug MCP servers into `completion()` so the model can use external tools (e.g., web search) via the same tool-call mechanism.

From ea33a75a5cd06cd560540c607671d50aedcaa386 Mon Sep 17 00:00:00 2001
From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com>
Date: Wed, 29 Apr 2026 13:50:59 -0300
Subject: [PATCH 8/8] doc: sdk - create new example - img2img with klein

---
 packages/sdk/examples/diffusion-flux2-klein-img2img.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/sdk/examples/diffusion-flux2-klein-img2img.ts b/packages/sdk/examples/diffusion-flux2-klein-img2img.ts
index 65eccf96d4..41c8c06047 100644
--- a/packages/sdk/examples/diffusion-flux2-klein-img2img.ts
+++ b/packages/sdk/examples/diffusion-flux2-klein-img2img.ts
@@ -2,7 +2,7 @@ import {
   loadModel,
   unloadModel,
   diffusion,
-  FLUX_2_KLEIN_4B_Q8_0,
+  FLUX_2_KLEIN_4B_Q4_0,
   FLUX_2_KLEIN_4B_VAE,
   QWEN3_4B_Q4_K_M,
 } from "@qvac/sdk";
@@ -14,7 +14,7 @@ import path from "path";
 const inputPath = process.argv[2];
 const prompt = process.argv[3] || "oil painting style, vibrant colors";
 const outputDir = process.argv[4] || ".";
-const diffusionModelSrc = process.argv[5] || FLUX_2_KLEIN_4B_Q8_0;
+const diffusionModelSrc = process.argv[5] || FLUX_2_KLEIN_4B_Q4_0;
 const llmModelSrc = process.argv[6] || QWEN3_4B_Q4_K_M;
 const vaeModelSrc = process.argv[7] || FLUX_2_KLEIN_4B_VAE;