From e7603fdb62e8f8bd818326b77501181b745a5a51 Mon Sep 17 00:00:00 2001
From: namelsking <functionsilence@gmail.com>
Date: Fri, 1 May 2026 16:09:16 +0200
Subject: [PATCH] feat[skiplog]: improve sdk-changelog tooling and add 0.10.0
 release notes

Tooling (scripts/sdk/generate-changelog-sdk-pod.cjs):
- Backmerge filter: PRs whose subject starts with `Backmerge` or
  `Merge release ...` are skipped during processSDKPRs (same shape as
  the existing [skiplog] filter).
- Companion filter + entry-count strip: new isCompanionEntry,
  stripEntryCount, cleanModelEntries helpers applied to the inline
  [mod] summary in CHANGELOG.md and the body of models.md. Recognises
  *_LEX / *_VOCAB / *_DATA / *_METADATA constant suffixes and any
  line containing the word "companion".
- Indented continuation lines for [mod] PRs: Added/Updated/Removed
  are emitted as indented sub-rows under the bullet (capped at
  MAX_INLINE_MODELS = 5 per section, "(and N more)" for the rest)
  instead of stuffed inline.
- Announcement-post generator: new --generate-announcement-post CLI
  flag (with optional --version) parses CHANGELOG.md via
  parseChangelogMarkdown and emits the Slack template (:qvac: header,
  NPM/GitHub/changelog links, conditional :warning: Breaking Changes,
  per-section bullets with <url> link wrapping and :boom: breaking
  markers, footer). Sections cap at MAX_ANNOUNCEMENT_BULLETS = 10
  with "... And much more, see full list in changelog :memo:" only
  when strictly more than 10.
- New helpers exported: parseChangelogMarkdown, generateAnnouncementPost.

Skill (.cursor/skills/sdk-changelog/SKILL.md):
- Step 4 (CHANGELOG_LLM.md) is now mandatory.
- New Step 5: generate announcement-post.txt (mandatory) with the
  gitignore note and template spec.
- NOTICE renumbered to Step 6.
- Documented all new policies (backmerge, companion, entry-count
  strip, indentation, max-bullets cap).
- CLI parameters table refreshed.

.gitignore:
- Added packages/*/changelog/*/announcement-post.txt. The post is a
  Slack copy-paste working artifact, not a release deliverable.

Release notes for 0.10.0:
- New packages/sdk/changelog/0.10.0/ folder with CHANGELOG.md,
  breaking.md, api.md, models.md, CHANGELOG_LLM.md.
- Root aggregate packages/sdk/CHANGELOG.md rebuilt with v0.10.0 at
  top.
- packages/sdk/NOTICE refreshed (191 models, 179 JS deps).
- packages/sdk/package.json bumped 0.9.1 -> 0.10.0.

Backmerge of release-sdk-0.10.0 -> main is a no-op for the release
artifacts (changelog, NOTICE) because they land here directly.
---
 .cursor/skills/sdk-changelog/SKILL.md         | 149 +++++-
 .gitignore                                    |   6 +
 packages/sdk/CHANGELOG.md                     | 444 ++++++++++++++++
 packages/sdk/NOTICE                           |  69 +--
 packages/sdk/changelog/0.10.0/CHANGELOG.md    |  63 +++
 .../sdk/changelog/0.10.0/CHANGELOG_LLM.md     | 443 ++++++++++++++++
 packages/sdk/changelog/0.10.0/api.md          | 501 ++++++++++++++++++
 packages/sdk/changelog/0.10.0/breaking.md     | 278 ++++++++++
 packages/sdk/changelog/0.10.0/models.md       |  20 +
 packages/sdk/package.json                     |   2 +-
 scripts/sdk/generate-changelog-sdk-pod.cjs    | 464 +++++++++++++++-
 11 files changed, 2376 insertions(+), 63 deletions(-)
 create mode 100644 packages/sdk/changelog/0.10.0/CHANGELOG.md
 create mode 100644 packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md
 create mode 100644 packages/sdk/changelog/0.10.0/api.md
 create mode 100644 packages/sdk/changelog/0.10.0/breaking.md
 create mode 100644 packages/sdk/changelog/0.10.0/models.md
diff --git a/.cursor/skills/sdk-changelog/SKILL.md b/.cursor/skills/sdk-changelog/SKILL.md
index 126b51ca64..721741c333 100644
--- a/.cursor/skills/sdk-changelog/SKILL.md
+++ b/.cursor/skills/sdk-changelog/SKILL.md
@@ -21,6 +21,9 @@ Generate changelogs for SDK pod packages following the monorepo GitFlow.
 
 ## Workflow
 
+Every step is mandatory. Do **not** ask the user whether to do `CHANGELOG_LLM.md` or
+`NOTICE` — they are part of this skill and always run.
+
 ### Step 1: Identify Target Package
 
 If the user doesn't specify, ask which SDK pod package they want to generate a changelog for.
@@ -56,20 +59,126 @@ With migration flags:
 node scripts/sdk/generate-changelog-sdk-pod.cjs --package=<name> --base-commit=<sha> --base-version=<version>
 ```
 
-### Step 4: Generate CHANGELOG_LLM.md (if requested)
+The script automatically excludes:
+
+- PRs tagged `[skiplog]`.
+- **Backmerge PRs** (subjects starting with `Backmerge` or `Merge release …`).
+  Backmerges merge a release branch back into main; their content is already
+  documented in the release branch's own changelog, so listing them here is noise.
+- PRs whose title fails the SDK PR-format validator (these are warned, not silently
+  dropped — fix the title and re-run, or surface to the PR author).
+
+For `[mod]` PRs, the script extracts the `Added`/`Updated`/`Removed` model lists
+from the PR body and renders them as **indented continuation lines beneath the
+bullet** in `CHANGELOG.md` (each section on its own line — never inline as one
+giant row). The same filtered lists are written to `models.md`.
+
+The extractor applies two policies (in this order):
+
+1. **Companion entries are dropped.** Companions are auxiliary files that ship
+   alongside a primary model but aren't independently usable — vocab files,
+   lexicons, raw data shards, metadata blobs. The filter recognises constant
+   suffixes (`*_LEX`, `*_VOCAB`, `*_DATA`, `*_METADATA`) **and** any free-form
+   description containing the word "companion". Only first-class models reach
+   the changelog.
+2. **Entry-count suffixes are stripped.** `(N entries)` /
+   `(N entries — short note)` decorations are removed from the displayed
+   text — readers can follow the `models.md` link for exact counts.
+
+After both filters, each section is trimmed to `MAX_INLINE_MODELS` (currently
+**5**) entries, with `(and N more)` for the remainder. Example:
+
+```
+- Regenerate model registry. (see PR [#123](...)) - See [model changes](./models.md)
+  Added: NMT_Q0F16, NMT_Q4_0 (and 12 more)
+  Removed: MARIAN_OPUS_*
+```
+
+If after filtering a section is empty, it's omitted. If all sections are empty
+the bullet emits with no continuation lines.
+
+When writing the human-readable `CHANGELOG_LLM.md` (Step 4), apply the same
+"no informational value" rule manually: skip backmerges, automated bumps, and any
+entry whose subject would just repeat what a previous release already said. For
+the Models section, mirror the script's policy — keep it concise in the body
+(highlight the most notable adds/removes) and defer the full constant list to
+the `### Added` / `### Removed` blocks at the bottom.
+
+### Step 4: Generate CHANGELOG_LLM.md (mandatory)
+
+Always run this step. Do not ask the user — it's part of the skill.
+
+After raw changelog files exist, generate the human-readable version at
+`packages/<package>/changelog/<version>/CHANGELOG_LLM.md`.
 
-After raw changelog files exist, generate the human-readable version.
 See [references/changelog-llm-format.md](references/changelog-llm-format.md) for the format guide.
 
+After writing the file, re-run the raw generator (or rebuild the root aggregate) so
+`packages/<package>/CHANGELOG.md` picks up the new `CHANGELOG_LLM.md` (the aggregator
+prefers it over `CHANGELOG.md`). Easiest way: re-run the script from Step 3 — it's idempotent.
+
+### Step 5: Generate `announcement-post.txt` (mandatory)
+
+Always run this step after Step 4. It produces a Slack-ready copy-paste post at
+`packages/<package>/changelog/<version>/announcement-post.txt`.
+
+The file is **gitignored** (`packages/*/changelog/*/announcement-post.txt`) — it's a
+local working artifact, not a committed deliverable. Never `git add` it.
+
+```bash
+node scripts/sdk/generate-changelog-sdk-pod.cjs --package=<name> --generate-announcement-post
+```
+
+The script parses `CHANGELOG.md` for the package's current version (from
+`package.json`) and emits the Slack template:
+
+- `:qvac: SDK <version> :rocket: NPM Public release` header.
+- NPM, GitHub release, and full-changelog tree links.
+- `:warning: Breaking Changes` section (with link to `breaking.md`) — only if any
+  PR is breaking.
+- `Release Date: YYYY-MM-DD`.
+- One Slack section per CHANGELOG.md section (`:sparkles: Features`,
+  `:electric_plug: API`, `:ladybug: Fixes`, `:package: Models`, `:blue_book: Docs`,
+  `:test_tube: Tests`, `:broom: Chores`, `:gear: Infrastructure`).
+- Each bullet uses `•`, wraps the PR URL in `<...>` (suppresses Slack unfurl), and
+  appends ` :boom: breaking` when the bullet is breaking.
+- Sections are capped at `MAX_ANNOUNCEMENT_BULLETS` (currently **10**). The
+  `... And much more, see full list in changelog :memo:` line is only added
+  when a section has *more than 10* entries; anything 10 or fewer is emitted
+  verbatim.
+- Footer: `Thanks to everyone on QVAC team :green_heart: :qvac: :green_heart:`.
+
+If the post needs hand-tuning (e.g. the Models section needs custom count summaries
+that the parser can't infer), edit the file directly. It's gitignored, so changes
+won't pollute the diff.
+
+### Step 6: Update NOTICE file for the target package
+
+After Step 5 completes, run notice-generate for the same `--package` to ensure
+its NOTICE file reflects any dependency changes in the release:
+
+```bash
+source .env
+node .cursor/skills/notice-generate/scripts/generate-notice.js <package-name>
+```
+
+Do NOT commit the announcement post (gitignored) and let the user review the rest
+before committing.
+
+See `.cursor/skills/notice-generate/SKILL.md` for full details.
+
 ## CLI Parameters
 
-| Flag             | Required | Description                                                        |
-| ---------------- | -------- | ------------------------------------------------------------------ |
-| `--package`      | Yes      | Package name (e.g., `sdk`)                                         |
-| `--base-commit`  | No       | Initial commit SHA for migration (overrides tag lookup)            |
-| `--base-version` | No       | Version label for base commit (display only)                       |
-| `--release-type` | No       | `minor` or `patch` (auto-detected from package.json version)       |
-| `--dry-run`      | No       | Preview output without writing files                               |
+| Flag                            | Required | Description                                                        |
+| ------------------------------- | -------- | ------------------------------------------------------------------ |
+| `--package`                     | Yes      | Package name (e.g., `sdk`)                                         |
+| `--base-commit`                 | No       | Initial commit SHA for migration (overrides tag lookup)            |
+| `--base-version`                | No       | Version label for base commit (display only)                       |
+| `--release-type`                | No       | `minor` or `patch` (auto-detected from package.json version)       |
+| `--dry-run`                     | No       | Preview output without writing files                               |
+| `--update-root-changelog`       | No       | Rebuild only the root aggregate `packages/<pkg>/CHANGELOG.md`      |
+| `--generate-announcement-post`  | No       | Generate `announcement-post.txt` for the package's current version |
+| `--version`                     | No       | Override version when used with `--generate-announcement-post`     |
 
 ## Output
 
@@ -79,7 +188,9 @@ Generates changelog files in `packages/<package>/changelog/<version>/`:
 - `breaking.md` - Breaking changes detail (if `[bc]` PRs)
 - `api.md` - API changes detail (if `[api]` PRs)
 - `models.md` - Model changes (if `[mod]` PRs)
-- `CHANGELOG_LLM.md` - Human-readable version (generated separately via Step 4)
+- `CHANGELOG_LLM.md` - Human-readable version (always generated, see Step 4)
+- `announcement-post.txt` - Slack copy-paste post (always generated, see Step 5,
+  **gitignored** — never commit)
 
 Additionally:
 
@@ -95,19 +206,6 @@ Examples:
 - `sdk-v0.8.1` (patch — used as base for next patch release)
 - `rag-v2.0.0`
 
-### Step 5: Update NOTICE file for the target package
-
-After changelog generation completes, run notice-generate for the same `--package` to ensure its NOTICE file reflects any dependency changes in the release:
-
-```bash
-source .env
-node .cursor/skills/notice-generate/scripts/generate-notice.js <package-name>
-```
-
-Do NOT commit — the user will review and commit.
-
-See `.cursor/skills/notice-generate/SKILL.md` for full details.
-
 ## Quality Checklist
 
 Before completing:
@@ -116,9 +214,10 @@ Before completing:
 - [ ] Base reference resolved (tag or `--base-commit`)
 - [ ] PRs scoped to package path only
 - [ ] Changelog files written to correct version directory
-- [ ] If CHANGELOG_LLM.md requested, follows format guide
+- [ ] CHANGELOG_LLM.md generated (mandatory) and follows format guide
+- [ ] announcement-post.txt generated (mandatory, gitignored)
 - [ ] NOTICE file updated for the target package
-- [ ] Root CHANGELOG.md rebuilt from all version folders
+- [ ] Root CHANGELOG.md rebuilt from all version folders (and picks up CHANGELOG_LLM.md)
 - [ ] Versions sorted in descending semver order
 - [ ] No duplicated versions
 - [ ] Root file is deterministic (fully regenerated)
diff --git a/.gitignore b/.gitignore
index a220715d88..ad7677aca3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,12 @@ packages/**/.npmrc
 packages/sdk/bun.lock
 NOTICE_LOG.txt
 NOTICE_FULL_REPORT.txt
+
+# Slack/Discord copy-paste announcement posts generated by the
+# changelog skill (see scripts/sdk/generate-changelog-sdk-pod.cjs
+# --generate-announcement-post). These are local working artifacts,
+# not release deliverables — never commit them.
+packages/*/changelog/*/announcement-post.txt
 .claude/worktrees
 
 # Auto-generated agent config (run /setup to regenerate)
diff --git a/packages/sdk/CHANGELOG.md b/packages/sdk/CHANGELOG.md
index d7eb342a25..b7ad3bc6a4 100644
--- a/packages/sdk/CHANGELOG.md
+++ b/packages/sdk/CHANGELOG.md
@@ -1,5 +1,449 @@
 # Changelog
 
+## [0.10.0]
+
+📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.10.0
+
+This release lands a redesigned completion API built on a unified event stream, a generic
+companion-set system that handles multi-file models in parallel, and a much stronger model
+type/capability system that catches mis-routed calls at compile time. It also rewires
+delegated inference to direct DHT connections, expands the addon surface (img2img,
+structured output, dynamic tools, tool dialects, per-segment whisper metadata,
+sentence-streaming TTS), and reshapes the model registry around companion sets.
+
+## Breaking Changes
+
+### Unified `CompletionEvent` stream
+
+`completion()` now returns a `CompletionRun` with a single canonical `events` stream that
+carries content, thinking, tool calls, stats, and completion in one ordered, sequenced
+sequence. The legacy `tokenStream`/`stats` fields still work as derived views, but the
+event stream is the authoritative API going forward and is what enables features like
+captured thinking and structured tool framing.
+
+**Before:**
+
+```typescript
+const result = completion({ modelId, history, stream: true });
+for await (const token of result.tokenStream) { /* ... */ }
+const stats = await result.stats;
+```
+
+**After:**
+
+```typescript
+const run = completion({ modelId, history, stream: true, captureThinking: true });
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+  if (event.type === "toolCall") console.log(event.call.name);
+}
+const result = await run.final;
+// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText
+```
+
+### Model type & capability system overhaul
+
+`LoadModelOptions` is no longer a single catch-all. Custom plugins must use the new
+`LoadCustomPluginModelOptions<"plugin-name">` generic so the literal plugin string is
+pinned at the type level. Built-in model types continue to pick the right overload
+automatically when the annotation is dropped.
+
+At runtime, built-in SDK operations now throw `MODEL_OPERATION_NOT_SUPPORTED` when called
+against the wrong model type — with a message that lists the requested operation, the
+loaded model's type, and the supported operations on it. The lower-level `pluginInvoke`
+and `pluginInvokeStream` paths still surface `PLUGIN_HANDLER_NOT_FOUND` as before.
+
+`translate(...)` now routes by the loaded model's registered type. Passing a mismatched
+`modelType` throws `ModelTypeMismatchError` instead of silently mis-routing the call.
+
+**Before:**
+
+```typescript
+import type { LoadModelOptions } from "@qvac/sdk";
+
+const opts: LoadModelOptions = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+```
+
+**After:**
+
+```typescript
+import type { LoadCustomPluginModelOptions } from "@qvac/sdk";
+
+const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+// Or just drop the annotation — TS picks the right overload.
+```
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) {
+    // Includes requested operation, loaded model type, supported operations,
+    // and suggested model types.
+  }
+}
+```
+
+### Companion-set download progress field
+
+Multi-file model downloads (ONNX, future formats) now report progress through a generic
+`fileSetInfo` field instead of the ONNX-specific `onnxInfo`. The shape is identical, only
+the field name changed.
+
+**Before:**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.onnxInfo) {
+    console.log(`[${progress.onnxInfo.currentFile}] ${progress.onnxInfo.overallPercentage.toFixed(1)}%`);
+  }
+}
+```
+
+**After:**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.fileSetInfo) {
+    console.log(`[${progress.fileSetInfo.currentFile}] ${progress.fileSetInfo.overallPercentage.toFixed(1)}%`);
+  }
+}
+```
+
+### Delegated inference uses direct DHT connect
+
+Delegation no longer rendezvous over a shared topic. Consumers connect directly to a
+provider's public key via `swarm.dht.connect(publicKey)`, and providers bind the DHT
+server with `swarm.listen()` instead of announcing a topic. This removes a class of
+discovery-flake failures and shortens connect time. Callers using the high-level
+delegation API see no surface change; integrators driving Hyperswarm directly should
+update their join/listen logic.
+
+### Plugin constructor migration
+
+SDK plugins (`definePlugin`) now use the new addon constructor shape. Plugin authors
+need to migrate their `createModel` implementation to match — the SDK in this release
+ships with all first-party plugins already migrated.
+
+## New APIs and Capabilities
+
+### `getLoadedModelInfo` for runtime introspection
+
+A new `getLoadedModelInfo` API returns metadata for a loaded `modelId`, discriminated on
+`isDelegated`. Local models expose their authoritative handler list and `modelType`;
+delegated models defer to the provider. Useful for preflighting a built-in SDK call
+before issuing the RPC.
+
+```typescript
+import { getLoadedModelInfo, transcribe } from "@qvac/sdk";
+
+const info = await getLoadedModelInfo({ modelId });
+
+if (info.isDelegated || info.handlers.includes("transcribeStream")) {
+  await transcribe({ modelId /* ... */ });
+}
+```
+
+### Structured output (`responseFormat`)
+
+`completion()` now accepts a `responseFormat` option that constrains the model to emit
+schema-valid JSON. The output is guaranteed to parse against the supplied JSON Schema.
+
+```typescript
+const run = completion({
+  modelId,
+  history: [{ role: "user", content: "Extract: I'm Alice, 30, data engineer." }],
+  stream: true,
+  responseFormat: {
+    type: "json_schema",
+    json_schema: {
+      name: "Person",
+      schema: {
+        type: "object",
+        properties: {
+          name: { type: "string" },
+          age: { type: "integer" },
+          occupation: { type: "string" },
+        },
+        required: ["name", "age", "occupation"],
+        additionalProperties: false,
+      },
+    },
+  },
+});
+
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+}
+const final = await run.final;
+JSON.parse(final.contentText); // schema-valid
+```
+
+### Dynamic tools mode
+
+LLM models can now opt into a `dynamic` tools mode at load time. Subsequent
+`completion()` calls can pass an entirely different `tools` array on each turn, and the
+addon trims the previous tool block from the KV cache so rotation is free — no need to
+invalidate the cache or pin the tool set per-session.
+
+```typescript
+import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk";
+
+const modelId = await loadModel({
+  modelSrc: QWEN3_1_7B_INST_Q4,
+  modelType: "llm",
+  modelConfig: {
+    ctx_size: 4096,
+    tools: true,
+    toolsMode: TOOLS_MODE.dynamic,
+  },
+});
+
+// Turn 1 — weather tools.
+const turn1 = completion({
+  modelId, history, kvCache, stream: true,
+  tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }],
+});
+
+// Turn 2 — same kvCache, different tools. Free rotation.
+const turn2 = completion({
+  modelId, history, kvCache, stream: true,
+  tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }],
+});
+```
+
+### Tool-call dialect routing
+
+Tool-call parsing is now dialect-aware. The SDK auto-detects between `hermes`,
+`pythonic`, and `json` framings, and a new `toolDialect` parameter lets you force a
+specific parser when auto-detection picks the wrong path — common for Llama 3.x
+fine-tunes that emit native pythonic headers, which the auto-router defaults to `hermes`
+for empirical reasons.
+
+```typescript
+import { completion, type ToolDialect } from "@qvac/sdk";
+
+const result = completion({
+  modelId, history, tools, stream: true,
+  toolDialect: "pythonic", // "hermes" | "pythonic" | "json"
+});
+```
+
+### img2img for diffusion models
+
+The diffusion API now accepts an `init_image` for SDEdit-style image-to-image on
+SD/SDXL, and in-context conditioning on FLUX.2. `strength` controls how much of the
+source is preserved on SD/SDXL; FLUX.2 ignores it (the path is purely conditional).
+
+```typescript
+const initImage = new Uint8Array(fs.readFileSync("input.png"));
+const { outputs } = diffusion({
+  modelId,
+  prompt: "oil painting style, vibrant colors",
+  init_image: initImage,
+  strength: 0.5, // 0 = keep source, 1 = ignore source
+});
+```
+
+### Sentence-level TTS streaming
+
+Onnx text-to-speech can now stream output one sentence at a time, either as a
+self-contained `textToSpeech({ stream: true, sentenceStream: true })` call or via a
+duplex `textToSpeechStream` session that you can pipe a streaming LLM into. Each chunk
+exposes the int16 PCM samples plus the source sentence and chunk index.
+
+```typescript
+const session = await textToSpeechStream({
+  modelId: ttsModelId,
+  inputType: "text",
+  accumulateSentences: true,
+  sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual"
+  flushAfterMs: 400,
+});
+
+(async () => {
+  for await (const delta of completion({ modelId: llmModelId /* ... */ }).tokenStream) {
+    session.write(delta);
+  }
+  session.end();
+})();
+
+for await (const chunk of session) {
+  // chunk.buffer / chunk.chunkIndex / chunk.sentenceChunk
+  if (chunk.done) break;
+}
+```
+
+### Per-segment whisper metadata
+
+Both `transcribe` (batch) and `transcribeStream` (duplex) now return structured
+`TranscribeSegment` objects with start/end timestamps, segment IDs, and an `append`
+flag — enabling proper subtitle generation and timeline alignment instead of raw text
+concatenation.
+
+```typescript
+const segments = await transcribe({ modelId, audioChunk: audioFilePath, metadata: true });
+for (const s of segments) {
+  console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`);
+}
+```
+
+### Suspend lifecycle gate and `state()`
+
+`suspend()` is now serialized through a lifecycle gate that prevents overlapping
+suspend/resume races. A new `state()` API reports the current lifecycle phase:
+`active`, `suspending`, `suspended`, or `resuming`.
+
+```typescript
+import { state, suspend, resume, type LifecycleState } from "@qvac/sdk";
+
+await suspend();
+const current: LifecycleState = await state();
+if (current !== "active") {
+  await resume();
+}
+```
+
+### Registry download retries and configurable stream timeout
+
+Two new SDK config knobs cover slow/unstable links: `registryDownloadMaxRetries` retries
+`REQUEST_TIMEOUT` failures (set to `0` to disable), and `registryStreamTimeoutMs`
+extends the per-block stream timeout beyond the default 60s.
+
+```typescript
+import { setSDKConfig } from "@qvac/sdk";
+
+setSDKConfig({
+  registryDownloadMaxRetries: 5,
+  registryStreamTimeoutMs: 180_000,
+});
+```
+
+### Auto KV-cache: replay the canonical assistant turn
+
+When auto KV-cache is enabled, the completion result now exposes
+`final.cacheableAssistantContent` — the exact assistant string the SDK persisted to the
+cache key on this turn. Push it back into `history` verbatim on the next turn to
+guarantee a cache hit. Tool-call turns aren't auto-cached today and omit the field;
+fall back to `final.contentText` in that case.
+
+```typescript
+const run = completion({ modelId, history, kvCache: true });
+for await (const _ of run.tokenStream) { /* stream */ }
+const final = await run.final;
+const nextHistory = [
+  ...history,
+  { role: "assistant", content: final.cacheableAssistantContent ?? final.contentText },
+  { role: "user", content: "follow-up question" },
+];
+```
+
+### LLM-addon cache API plumbed through SDK
+
+The SDK now wires through the LLM addon's first-class cache API — including explicit
+`deleteCache({ kvCacheKey })` for evicting a named cache key — so consumers can manage
+KV-cache lifetimes alongside `loadModel`/`unloadModel`.
+
+### NMTcpp 2.0.1 surface
+
+The SDK NMT plugin now targets `@qvac/translation-nmtcpp 2.0.1` with a structured
+constructor that distinguishes primary and pivot model files, vocab files, and pivot
+config (beam size, top-k). Bergamot models are also picked up via path-based vocab
+resolution and grouped into companion sets, which lets the cache and download paths
+treat them like any other multi-file model.
+
+## Features
+
+### Parallel orchestration and download dedupe
+
+Model loading is now genuinely parallel where it can be: the primary model and any
+companion files (vision projection, vocab, etc.) download concurrently, and concurrent
+requests for the same asset are deduplicated to a single transfer. Cancellation cleans
+up all active transfers atomically with no leaked state. Profiling fields
+(`sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`,
+`modelInitializationTime`, `checksumValidationTime`) are populated correctly across
+both primary and companion downloads, with aggregate stats merged at the run level.
+
+The companion pipeline is also generic: `companions.ts` is the only format-aware piece,
+and adding a new multi-file format is a matter of dropping in a detection function and
+registering it with `groupCompanionSets`. Everything downstream — codegen, resolver,
+cache probing, storage cleanup — handles it automatically.
+
+### Real-time voice assistant example
+
+A new end-to-end example demonstrates a real-time voice assistant pipeline (whisper →
+LLM → TTS) wired together using the SDK's streaming primitives.
+
+## Bug Fixes
+
+- RPC initialization in the Node runtime now has an explicit timeout, so a wedged
+  transport can no longer hang `loadModel`/`unloadModel` indefinitely.
+- The registry client now opens its corestore with `wait: true`, eliminating a startup
+  race where downloads could begin before replication was ready.
+- KV-cache `savedCount` is no longer incremented on cancelled or zero-token turns,
+  preventing inflated cache stats.
+- `delete-cache` RPC now scopes invalidation to the deleted key only instead of wiping
+  unrelated entries.
+- Delegated transports strip the `__profiling` envelope before zod validation, fixing a
+  spurious validation error when profiling is enabled on the consumer side.
+- Replaced `z.xor` with `z.union` and bumped the zod floor to `^4.3.0` to track upstream
+  breaking changes.
+- LLM-based translation now uses deterministic decoding so the same input produces the
+  same output across runs.
+- Inflight delegation requests that get rejected now run their cleanup chain to
+  completion instead of leaking pending promises.
+
+## Model Registry Changes
+
+The model registry was regenerated around companion-set metadata. The user-facing surface
+is leaner: families that used to live as separate `*_DATA`, `*_LEX`, `*_VOCAB`, and
+`METADATA_*` constants are now companion-only — they're still downloaded, but they're
+not addressable as standalone model sources. Marian Opus models were renamed under the
+`NMT_*` namespace to match the rest of the NMT family.
+
+### Added
+
+```
+NMT_Q0F16 through NMT_Q0F16_9 (10 entries)
+NMT_Q4_0 through NMT_Q4_0_12+ (22 entries)
+```
+
+### Removed (now companion-only or renamed)
+
+```
+*_DATA (32 entries — companion-only, e.g. PARAKEET_TDT_ENCODER_DATA_FP32, TTS_*_DATA)
+BERGAMOT_*_LEX (93 entries — companion-only)
+BERGAMOT_*_VOCAB (93 entries — companion-only)
+BERGAMOT_METADATA_* (87 entries — companion-only)
+MARIAN_OPUS_* (32 entries — renamed to NMT_*)
+```
+
+## Documentation, Tests, and Infrastructure
+
+- Diffusion documentation was extended to cover the new img2img flows (SDEdit on
+  SD/SDXL, in-context conditioning on FLUX.2).
+- Android sharded-model-resume tests no longer trip Scudo OOM — the test harness now
+  bounds memory more conservatively on long-running resume scenarios.
+- The tests-qvac docs, tooling, and CI workflow job names were refreshed for the new
+  suite filtering and PR-triggered e2e workflows. Suite filtering plus PR-trigger labels
+  let CI run targeted SDK e2e subsets on demand instead of always running the full grid.
+- A pre-terminate cleanup hook stabilises mobile smoke: the mobile auto-close path now
+  awaits worker cleanup acknowledgement before terminating the worklet.
+- `DataLoader` cleanup logic was scoped down to `packages/rag` so the SDK no longer
+  carries that surface.
+
 ## [0.9.1]
 
 📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.9.1
diff --git a/packages/sdk/NOTICE b/packages/sdk/NOTICE
index 7d2b63cb90..cc94473e7c 100644
--- a/packages/sdk/NOTICE
+++ b/packages/sdk/NOTICE
@@ -328,7 +328,7 @@ Third-Party Model Licenses
   model.enis.intgemm.alphas
     https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enis
   model.enit.intgemm.alphas
-    https://github.com/mozilla/firefox-translations-models/tree/main/models/tiny/enit
+    https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enit
   model.enja.intgemm.alphas
     https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enja
   model.enkn.intgemm.alphas
@@ -372,7 +372,7 @@ Third-Party Model Licenses
   model.enzh.intgemm.alphas
     https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh
   model.esen.intgemm.alphas
-    https://github.com/mozilla/firefox-translations-models/tree/main/models/tiny/esen
+    https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/esen
   model.eten.intgemm.alphas
     https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/eten
   model.faen.intgemm.alphas
@@ -489,19 +489,26 @@ JavaScript Dependencies
 
   @hyperswarm/secret-stream@6.9.1
     https://github.com/holepunchto/hyperswarm-secret-stream
-  @qvac/decoder-audio@0.3.8
+  @qvac/decoder-audio@0.3.6
+    https://github.com/tetherto/qvac
+  @qvac/decoder-audio@0.3.7
     https://github.com/tetherto/qvac
   @qvac/diagnostics@0.1.1
   @qvac/diffusion-cpp@0.3.0
     https://github.com/tetherto/qvac
+  @qvac/dl-base@0.1.1
+  @qvac/dl-hyperdrive@0.1.1
   @qvac/embed-llamacpp@0.14.0
     https://github.com/tetherto/qvac
   @qvac/error@0.1.1
+  @qvac/infer-base@0.1.1
+  @qvac/infer-base@0.4.0
+    https://github.com/tetherto/qvac
   @qvac/infer-base@0.4.1
     https://github.com/tetherto/qvac
   @qvac/langdetect-text@0.1.2
     https://github.com/tetherto/qvac
-  @qvac/llm-llamacpp@0.16.0
+  @qvac/llm-llamacpp@0.17.3
     https://github.com/tetherto/qvac
   @qvac/logging@0.1.0
   @qvac/ocr-onnx@0.4.2
@@ -514,9 +521,9 @@ JavaScript Dependencies
     https://github.com/tetherto/qvac
   @qvac/registry-schema@0.1.2
   @qvac/response@0.1.2
-  @qvac/transcription-parakeet@0.3.2
+  @qvac/transcription-parakeet@0.3.1
     https://github.com/tetherto/qvac
-  @qvac/transcription-whispercpp@0.6.4
+  @qvac/transcription-whispercpp@0.6.1
     https://github.com/tetherto/qvac
   @qvac/translation-nmtcpp@2.1.0
     https://github.com/tetherto/qvac
@@ -552,19 +559,19 @@ JavaScript Dependencies
     https://github.com/holepunchto/bare-events
   bare-events@2.8.2
     https://github.com/holepunchto/bare-events
-  bare-fetch@2.9.0
+  bare-fetch@2.8.1
     https://github.com/holepunchto/bare-fetch
   bare-ffmpeg@1.2.2
     https://github.com/holepunchto/bare-ffmpeg
   bare-form-data@1.2.1
     https://github.com/holepunchto/bare-form-data
-  bare-fs@4.7.1
+  bare-fs@4.6.0
     https://github.com/holepunchto/bare-fs
   bare-hrtime@2.1.1
     https://github.com/holepunchto/bare-hrtime
   bare-http-parser@1.1.3
     https://github.com/holepunchto/bare-http-parser
-  bare-http1@4.5.6
+  bare-http1@4.5.5
     https://github.com/holepunchto/bare-http1
   bare-https@2.1.3
     https://github.com/holepunchto/bare-https
@@ -574,9 +581,7 @@ JavaScript Dependencies
     https://github.com/holepunchto/bare-lief
   bare-link@3.2.1
     https://github.com/holepunchto/bare-link
-  bare-mime@1.0.0
-    https://github.com/holepunchto/bare-mime
-  bare-module@6.2.0
+  bare-module@6.1.3
     https://github.com/holepunchto/bare-module
   bare-module-lexer@1.4.7
     https://github.com/holepunchto/bare-module-lexer
@@ -590,7 +595,7 @@ JavaScript Dependencies
     https://github.com/holepunchto/bare-node
   bare-node-worker-threads@1.0.0
     https://github.com/holepunchto/bare-node
-  bare-os@3.9.0
+  bare-os@3.8.7
     https://github.com/holepunchto/bare-os
   bare-path@3.0.0
     https://github.com/holepunchto/bare-path
@@ -600,39 +605,39 @@ JavaScript Dependencies
     https://github.com/holepunchto/bare-process
   bare-rpc@1.2.0
     https://github.com/holepunchto/bare-rpc
-  bare-runtime@1.28.4
+  bare-runtime@1.28.1
     https://github.com/holepunchto/bare-runtime
-  bare-runtime-darwin-arm64@1.28.4
+  bare-runtime-darwin-arm64@1.28.1
     https://github.com/holepunchto/bare-runtime
-  bare-semver@1.0.3
+  bare-semver@1.0.2
     https://github.com/holepunchto/bare-semver
   bare-signals@4.2.0
     https://github.com/holepunchto/bare-signals
   bare-stdio@1.0.2
     https://github.com/holepunchto/bare-stdio
-  bare-stream@2.13.1
+  bare-stream@2.12.0
     https://github.com/holepunchto/bare-stream
-  bare-structured-clone@1.5.4
+  bare-structured-clone@1.5.3
     https://github.com/holepunchto/bare-structured-clone
   bare-subprocess@5.2.3
     https://github.com/holepunchto/bare-subprocess
-  bare-tcp@2.2.12
+  bare-tcp@2.2.7
     https://github.com/holepunchto/bare-tcp
   bare-thread@1.2.0
     https://github.com/holepunchto/bare-thread
-  bare-tls@2.2.3
+  bare-tls@2.2.1
     https://github.com/holepunchto/bare-tls
   bare-tty@5.1.0
     https://github.com/holepunchto/bare-tty
   bare-type@1.1.0
     https://github.com/holepunchto/bare-type
-  bare-url@2.4.2
+  bare-url@2.4.0
     https://github.com/holepunchto/bare-url
   bare-worker@4.1.6
     https://github.com/holepunchto/bare-worker
-  bare-zlib@1.3.3
+  bare-zlib@1.3.1
     https://github.com/holepunchto/bare-zlib
-  blind-relay@1.5.0
+  blind-relay@1.4.0
     https://github.com/holepunchto/blind-relay
   compact-encoding@2.19.2
     https://github.com/holepunchto/compact-encoding
@@ -642,7 +647,7 @@ JavaScript Dependencies
     https://github.com/holepunchto/events-universal
   fd-lock@2.1.1
     https://github.com/holepunchto/fd-lock
-  fs-native-extensions@1.5.0
+  fs-native-extensions@1.4.5
     https://github.com/holepunchto/fs-native-extensions
   hyperblobs@2.11.1
     https://github.com/holepunchto/hyperblobs
@@ -656,8 +661,6 @@ JavaScript Dependencies
     https://github.com/holepunchto/hypercore-storage
   hyperdb@4.22.3
     https://github.com/holepunchto/hyperdb
-  hyperdht-address@1.0.1
-    https://github.com/holepunchto/hyperdht-address
   hyperdht-stats@1.10.0
     https://github.com/holepunchto/hyperdht-stats
   hyperdispatch@1.5.1
@@ -670,7 +673,7 @@ JavaScript Dependencies
     https://github.com/holepunchto/hyperswarm-stats
   index-encoder@3.5.0
     https://github.com/holepunchto/index-encoder
-  mirror-drive@1.14.2
+  mirror-drive@1.14.1
     https://github.com/holepunchto/mirror-drive
   noise-handshake@4.2.0
     https://github.com/holepunchto/noise-handshake
@@ -742,8 +745,8 @@ JavaScript Dependencies
     https://github.com/holepunchto/corestore
   debounceify@1.1.0
     https://github.com/mafintosh/debounceify
-  dht-rpc@6.26.4
-    https://github.com/holepunchto/dht-rpc
+  dht-rpc@6.26.3
+    https://github.com/mafintosh/dht-rpc
   events@3.3.0
     https://github.com/Gozala/events
   fast-fifo@1.3.2
@@ -762,7 +765,7 @@ JavaScript Dependencies
     https://github.com/holepunchto/hypercore
   hypercore-crypto@3.6.1
     https://github.com/mafintosh/hypercore-crypto
-  hyperdht@6.30.0
+  hyperdht@6.29.6
     https://github.com/holepunchto/hyperdht
   hyperswarm@4.17.0
     https://github.com/holepunchto/hyperswarm
@@ -780,8 +783,8 @@ JavaScript Dependencies
     https://github.com/mafintosh/nat-sampler
   protocol-buffers-encodings@1.2.0
     https://github.com/mafintosh/protocol-buffers-encodings
-  protomux@3.10.3
-    https://github.com/holepunchto/protomux
+  protomux@3.10.1
+    https://github.com/mafintosh/protomux
   queue-tick@1.0.1
     https://github.com/mafintosh/queue-tick
   random-array-iterator@1.0.0
@@ -816,6 +819,8 @@ JavaScript Dependencies
     https://github.com/mafintosh/tar-stream
   teex@1.0.1
     https://github.com/mafintosh/teex
+  test-tmp@1.4.0
+    https://github.com/mafintosh/test-tmp
   time-ordered-set@2.0.1
     https://github.com/mafintosh/time-ordered-set
   timeout-refresh@2.0.1
diff --git a/packages/sdk/changelog/0.10.0/CHANGELOG.md b/packages/sdk/changelog/0.10.0/CHANGELOG.md
new file mode 100644
index 0000000000..6b2304420e
--- /dev/null
+++ b/packages/sdk/changelog/0.10.0/CHANGELOG.md
@@ -0,0 +1,63 @@
+# Changelog v0.10.0
+
+Release Date: 2026-05-01
+
+## ✨ Features
+
+- Add real-time voice assistant example. (see PR [#1631](https://github.com/tetherto/qvac/pull/1631))
+- Add parallel orchestration, download dedupe, and generic companion-set support. (see PR [#1636](https://github.com/tetherto/qvac/pull/1636)) - See [breaking changes](./breaking.md)
+- Unified CompletionEvent stream as canonical completion API. (see PR [#1673](https://github.com/tetherto/qvac/pull/1673)) - See [breaking changes](./breaking.md)
+- Add Bergamot NMT companion-set grouping and path-based vocab resolution. (see PR [#1707](https://github.com/tetherto/qvac/pull/1707))
+- Switch delegation to direct DHT connect, drop topic end-to-end. (see PR [#1729](https://github.com/tetherto/qvac/pull/1729)) - See [breaking changes](./breaking.md)
+
+## 🔌 API
+
+- Update SDK nmtcpp plugin for @qvac/translation-nmtcpp 2.0.1. (see PR [#1563](https://github.com/tetherto/qvac/pull/1563)) - See [API changes](./api.md)
+- Add sentence-level streaming for onnx text-to-speech. (see PR [#1590](https://github.com/tetherto/qvac/pull/1590)) - See [API changes](./api.md)
+- Support the new llm addon cache api in sdk. (see PR [#1633](https://github.com/tetherto/qvac/pull/1633)) - See [API changes](./api.md)
+- Add img2img support to SDK diffusion API. (see PR [#1662](https://github.com/tetherto/qvac/pull/1662)) - See [API changes](./api.md)
+- Harden suspend with lifecycle gate and add state() api. (see PR [#1691](https://github.com/tetherto/qvac/pull/1691)) - See [API changes](./api.md)
+- Propagate whisper per-segment metadata to SDK users. (see PR [#1701](https://github.com/tetherto/qvac/pull/1701)) - See [API changes](./api.md)
+- Make auto KV-cache reuse completed turn history. (see PR [#1705](https://github.com/tetherto/qvac/pull/1705)) - See [API changes](./api.md)
+- Propagate registry download retries and expose stream timeout. (see PR [#1743](https://github.com/tetherto/qvac/pull/1743)) - See [API changes](./api.md)
+- Improve model type & capability system. (see PR [#1748](https://github.com/tetherto/qvac/pull/1748)) - See [breaking changes](./breaking.md), [API changes](./api.md)
+- Add responseFormat for structured output. (see PR [#1768](https://github.com/tetherto/qvac/pull/1768)) - See [API changes](./api.md)
+- Sdk "dynamic" tools mode. (see PR [#1779](https://github.com/tetherto/qvac/pull/1779)) - See [API changes](./api.md)
+- Pre-terminate cleanup hook + stabilise mobile smoke. (see PR [#1797](https://github.com/tetherto/qvac/pull/1797)) - See [API changes](./api.md)
+- Add native tool-call dialect routing (hermes, pythonic, json) with override. (see PR [#1802](https://github.com/tetherto/qvac/pull/1802)) - See [API changes](./api.md)
+
+## 🐞 Fixes
+
+- Add timeout to RPC initialization in Node runtime. (see PR [#1550](https://github.com/tetherto/qvac/pull/1550))
+- Enable corestoreOpts: { wait: true } for registry client. (see PR [#1699](https://github.com/tetherto/qvac/pull/1699))
+- Skip kv-cache savedCount on cancelled or zero-token turns. (see PR [#1737](https://github.com/tetherto/qvac/pull/1737))
+- Scope kv-cache invalidation to deleted key on RPC delete-cache. (see PR [#1740](https://github.com/tetherto/qvac/pull/1740))
+- Strip __profiling envelope in delegate transport before zod validation. (see PR [#1767](https://github.com/tetherto/qvac/pull/1767))
+- Replace z.xor with z.union, bump zod floor to ^4.3.0. (see PR [#1790](https://github.com/tetherto/qvac/pull/1790))
+- Deterministic decoding for LLM translate. (see PR [#1808](https://github.com/tetherto/qvac/pull/1808))
+- Handle inflight delegation rejection cleanup chain. (see PR [#1811](https://github.com/tetherto/qvac/pull/1811))
+
+## 📦 Models
+
+- Regenerate model registry with companion-set metadata. (see PR [#1700](https://github.com/tetherto/qvac/pull/1700)) - See [model changes](./models.md)
+  Added: NMT_Q0F16 through NMT_Q0F16_9, NMT_Q4_0 through NMT_Q4_0_12+
+  Removed: MARIAN_OPUS_*
+
+## 📘 Docs
+
+- Content update - SDK - diffusion - add img2img gen. (see PR [#1796](https://github.com/tetherto/qvac/pull/1796))
+
+## 🧪 Tests
+
+- Fix android sharded-model-resume scudo oom. (see PR [#1831](https://github.com/tetherto/qvac/pull/1831))
+
+## 🧹 Chores
+
+- Migrate SDK plugins to new addon constructor shape. (see PR [#1688](https://github.com/tetherto/qvac/pull/1688)) - See [breaking changes](./breaking.md)
+- Refresh tests-qvac docs, tooling, and workflow job names. (see PR [#1712](https://github.com/tetherto/qvac/pull/1712))
+- Scope down DataLoader cleanup to packages/rag. (see PR [#1754](https://github.com/tetherto/qvac/pull/1754))
+
+## ⚙️ Infrastructure
+
+- Add suite filtering and PR-triggered e2e test workflows for SDK. (see PR [#1653](https://github.com/tetherto/qvac/pull/1653))
+
diff --git a/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md b/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md
new file mode 100644
index 0000000000..6dcc30a493
--- /dev/null
+++ b/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md
@@ -0,0 +1,443 @@
+# QVAC SDK v0.10.0 Release Notes
+
+📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.10.0
+
+This release lands a redesigned completion API built on a unified event stream, a generic
+companion-set system that handles multi-file models in parallel, and a much stronger model
+type/capability system that catches mis-routed calls at compile time. It also rewires
+delegated inference to direct DHT connections, expands the addon surface (img2img,
+structured output, dynamic tools, tool dialects, per-segment whisper metadata,
+sentence-streaming TTS), and reshapes the model registry around companion sets.
+
+## Breaking Changes
+
+### Unified `CompletionEvent` stream
+
+`completion()` now returns a `CompletionRun` with a single canonical `events` stream that
+carries content, thinking, tool calls, stats, and completion in one ordered, sequenced
+sequence. The legacy `tokenStream`/`stats` fields still work as derived views, but the
+event stream is the authoritative API going forward and is what enables features like
+captured thinking and structured tool framing.
+
+**Before:**
+
+```typescript
+const result = completion({ modelId, history, stream: true });
+for await (const token of result.tokenStream) { /* ... */ }
+const stats = await result.stats;
+```
+
+**After:**
+
+```typescript
+const run = completion({ modelId, history, stream: true, captureThinking: true });
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+  if (event.type === "toolCall") console.log(event.call.name);
+}
+const result = await run.final;
+// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText
+```
+
+### Model type & capability system overhaul
+
+`LoadModelOptions` is no longer a single catch-all. Custom plugins must use the new
+`LoadCustomPluginModelOptions<"plugin-name">` generic so the literal plugin string is
+pinned at the type level. Built-in model types continue to pick the right overload
+automatically when the annotation is dropped.
+
+At runtime, built-in SDK operations now throw `MODEL_OPERATION_NOT_SUPPORTED` when called
+against the wrong model type — with a message that lists the requested operation, the
+loaded model's type, and the supported operations on it. The lower-level `pluginInvoke`
+and `pluginInvokeStream` paths still surface `PLUGIN_HANDLER_NOT_FOUND` as before.
+
+`translate(...)` now routes by the loaded model's registered type. Passing a mismatched
+`modelType` throws `ModelTypeMismatchError` instead of silently mis-routing the call.
+
+**Before:**
+
+```typescript
+import type { LoadModelOptions } from "@qvac/sdk";
+
+const opts: LoadModelOptions = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+```
+
+**After:**
+
+```typescript
+import type { LoadCustomPluginModelOptions } from "@qvac/sdk";
+
+const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+// Or just drop the annotation — TS picks the right overload.
+```
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) {
+    // Includes requested operation, loaded model type, supported operations,
+    // and suggested model types.
+  }
+}
+```
+
+### Companion-set download progress field
+
+Multi-file model downloads (ONNX, future formats) now report progress through a generic
+`fileSetInfo` field instead of the ONNX-specific `onnxInfo`. The shape is identical, only
+the field name changed.
+
+**Before:**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.onnxInfo) {
+    console.log(`[${progress.onnxInfo.currentFile}] ${progress.onnxInfo.overallPercentage.toFixed(1)}%`);
+  }
+}
+```
+
+**After:**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.fileSetInfo) {
+    console.log(`[${progress.fileSetInfo.currentFile}] ${progress.fileSetInfo.overallPercentage.toFixed(1)}%`);
+  }
+}
+```
+
+### Delegated inference uses direct DHT connect
+
+Delegation no longer rendezvous over a shared topic. Consumers connect directly to a
+provider's public key via `swarm.dht.connect(publicKey)`, and providers bind the DHT
+server with `swarm.listen()` instead of announcing a topic. This removes a class of
+discovery-flake failures and shortens connect time. Callers using the high-level
+delegation API see no surface change; integrators driving Hyperswarm directly should
+update their join/listen logic.
+
+### Plugin constructor migration
+
+SDK plugins (`definePlugin`) now use the new addon constructor shape. Plugin authors
+need to migrate their `createModel` implementation to match — the SDK in this release
+ships with all first-party plugins already migrated.
+
+## New APIs and Capabilities
+
+### `getLoadedModelInfo` for runtime introspection
+
+A new `getLoadedModelInfo` API returns metadata for a loaded `modelId`, discriminated on
+`isDelegated`. Local models expose their authoritative handler list and `modelType`;
+delegated models defer to the provider. Useful for preflighting a built-in SDK call
+before issuing the RPC.
+
+```typescript
+import { getLoadedModelInfo, transcribe } from "@qvac/sdk";
+
+const info = await getLoadedModelInfo({ modelId });
+
+if (info.isDelegated || info.handlers.includes("transcribeStream")) {
+  await transcribe({ modelId /* ... */ });
+}
+```
+
+### Structured output (`responseFormat`)
+
+`completion()` now accepts a `responseFormat` option that constrains the model to emit
+schema-valid JSON. The output is guaranteed to parse against the supplied JSON Schema.
+
+```typescript
+const run = completion({
+  modelId,
+  history: [{ role: "user", content: "Extract: I'm Alice, 30, data engineer." }],
+  stream: true,
+  responseFormat: {
+    type: "json_schema",
+    json_schema: {
+      name: "Person",
+      schema: {
+        type: "object",
+        properties: {
+          name: { type: "string" },
+          age: { type: "integer" },
+          occupation: { type: "string" },
+        },
+        required: ["name", "age", "occupation"],
+        additionalProperties: false,
+      },
+    },
+  },
+});
+
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+}
+const final = await run.final;
+JSON.parse(final.contentText); // schema-valid
+```
+
+### Dynamic tools mode
+
+LLM models can now opt into a `dynamic` tools mode at load time. Subsequent
+`completion()` calls can pass an entirely different `tools` array on each turn, and the
+addon trims the previous tool block from the KV cache so rotation is free — no need to
+invalidate the cache or pin the tool set per-session.
+
+```typescript
+import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk";
+
+const modelId = await loadModel({
+  modelSrc: QWEN3_1_7B_INST_Q4,
+  modelType: "llm",
+  modelConfig: {
+    ctx_size: 4096,
+    tools: true,
+    toolsMode: TOOLS_MODE.dynamic,
+  },
+});
+
+// Turn 1 — weather tools.
+const turn1 = completion({
+  modelId, history, kvCache, stream: true,
+  tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }],
+});
+
+// Turn 2 — same kvCache, different tools. Free rotation.
+const turn2 = completion({
+  modelId, history, kvCache, stream: true,
+  tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }],
+});
+```
+
+### Tool-call dialect routing
+
+Tool-call parsing is now dialect-aware. The SDK auto-detects between `hermes`,
+`pythonic`, and `json` framings, and a new `toolDialect` parameter lets you force a
+specific parser when auto-detection picks the wrong path — common for Llama 3.x
+fine-tunes that emit native pythonic headers, which the auto-router defaults to `hermes`
+for empirical reasons.
+
+```typescript
+import { completion, type ToolDialect } from "@qvac/sdk";
+
+const result = completion({
+  modelId, history, tools, stream: true,
+  toolDialect: "pythonic", // "hermes" | "pythonic" | "json"
+});
+```
+
+### img2img for diffusion models
+
+The diffusion API now accepts an `init_image` for SDEdit-style image-to-image on
+SD/SDXL, and in-context conditioning on FLUX.2. `strength` controls how much of the
+source is preserved on SD/SDXL; FLUX.2 ignores it (the path is purely conditional).
+
+```typescript
+const initImage = new Uint8Array(fs.readFileSync("input.png"));
+const { outputs } = diffusion({
+  modelId,
+  prompt: "oil painting style, vibrant colors",
+  init_image: initImage,
+  strength: 0.5, // 0 = keep source, 1 = ignore source
+});
+```
+
+### Sentence-level TTS streaming
+
+Onnx text-to-speech can now stream output one sentence at a time, either as a
+self-contained `textToSpeech({ stream: true, sentenceStream: true })` call or via a
+duplex `textToSpeechStream` session that you can pipe a streaming LLM into. Each chunk
+exposes the int16 PCM samples plus the source sentence and chunk index.
+
+```typescript
+const session = await textToSpeechStream({
+  modelId: ttsModelId,
+  inputType: "text",
+  accumulateSentences: true,
+  sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual"
+  flushAfterMs: 400,
+});
+
+(async () => {
+  for await (const delta of completion({ modelId: llmModelId /* ... */ }).tokenStream) {
+    session.write(delta);
+  }
+  session.end();
+})();
+
+for await (const chunk of session) {
+  // chunk.buffer / chunk.chunkIndex / chunk.sentenceChunk
+  if (chunk.done) break;
+}
+```
+
+### Per-segment whisper metadata
+
+Both `transcribe` (batch) and `transcribeStream` (duplex) now return structured
+`TranscribeSegment` objects with start/end timestamps, segment IDs, and an `append`
+flag — enabling proper subtitle generation and timeline alignment instead of raw text
+concatenation.
+
+```typescript
+const segments = await transcribe({ modelId, audioChunk: audioFilePath, metadata: true });
+for (const s of segments) {
+  console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`);
+}
+```
+
+### Suspend lifecycle gate and `state()`
+
+`suspend()` is now serialized through a lifecycle gate that prevents overlapping
+suspend/resume races. A new `state()` API reports the current lifecycle phase:
+`active`, `suspending`, `suspended`, or `resuming`.
+
+```typescript
+import { state, suspend, resume, type LifecycleState } from "@qvac/sdk";
+
+await suspend();
+const current: LifecycleState = await state();
+if (current !== "active") {
+  await resume();
+}
+```
+
+### Registry download retries and configurable stream timeout
+
+Two new SDK config knobs cover slow/unstable links: `registryDownloadMaxRetries` retries
+`REQUEST_TIMEOUT` failures (set to `0` to disable), and `registryStreamTimeoutMs`
+extends the per-block stream timeout beyond the default 60s.
+
+```typescript
+import { setSDKConfig } from "@qvac/sdk";
+
+setSDKConfig({
+  registryDownloadMaxRetries: 5,
+  registryStreamTimeoutMs: 180_000,
+});
+```
+
+### Auto KV-cache: replay the canonical assistant turn
+
+When auto KV-cache is enabled, the completion result now exposes
+`final.cacheableAssistantContent` — the exact assistant string the SDK persisted to the
+cache key on this turn. Push it back into `history` verbatim on the next turn to
+guarantee a cache hit. Tool-call turns aren't auto-cached today and omit the field;
+fall back to `final.contentText` in that case.
+
+```typescript
+const run = completion({ modelId, history, kvCache: true });
+for await (const _ of run.tokenStream) { /* stream */ }
+const final = await run.final;
+const nextHistory = [
+  ...history,
+  { role: "assistant", content: final.cacheableAssistantContent ?? final.contentText },
+  { role: "user", content: "follow-up question" },
+];
+```
+
+### LLM-addon cache API plumbed through SDK
+
+The SDK now wires through the LLM addon's first-class cache API — including explicit
+`deleteCache({ kvCacheKey })` for evicting a named cache key — so consumers can manage
+KV-cache lifetimes alongside `loadModel`/`unloadModel`.
+
+### NMTcpp 2.0.1 surface
+
+The SDK NMT plugin now targets `@qvac/translation-nmtcpp 2.0.1` with a structured
+constructor that distinguishes primary and pivot model files, vocab files, and pivot
+config (beam size, top-k). Bergamot models are also picked up via path-based vocab
+resolution and grouped into companion sets, which lets the cache and download paths
+treat them like any other multi-file model.
+
+## Features
+
+### Parallel orchestration and download dedupe
+
+Model loading is now genuinely parallel where it can be: the primary model and any
+companion files (vision projection, vocab, etc.) download concurrently, and concurrent
+requests for the same asset are deduplicated to a single transfer. Cancellation cleans
+up all active transfers atomically with no leaked state. Profiling fields
+(`sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`,
+`modelInitializationTime`, `checksumValidationTime`) are populated correctly across
+both primary and companion downloads, with aggregate stats merged at the run level.
+
+The companion pipeline is also generic: `companions.ts` is the only format-aware piece,
+and adding a new multi-file format is a matter of dropping in a detection function and
+registering it with `groupCompanionSets`. Everything downstream — codegen, resolver,
+cache probing, storage cleanup — handles it automatically.
+
+### Real-time voice assistant example
+
+A new end-to-end example demonstrates a real-time voice assistant pipeline (whisper →
+LLM → TTS) wired together using the SDK's streaming primitives.
+
+## Bug Fixes
+
+- RPC initialization in the Node runtime now has an explicit timeout, so a wedged
+  transport can no longer hang `loadModel`/`unloadModel` indefinitely.
+- The registry client now opens its corestore with `wait: true`, eliminating a startup
+  race where downloads could begin before replication was ready.
+- KV-cache `savedCount` is no longer incremented on cancelled or zero-token turns,
+  preventing inflated cache stats.
+- `delete-cache` RPC now scopes invalidation to the deleted key only instead of wiping
+  unrelated entries.
+- Delegated transports strip the `__profiling` envelope before zod validation, fixing a
+  spurious validation error when profiling is enabled on the consumer side.
+- Replaced `z.xor` with `z.union` and bumped the zod floor to `^4.3.0` to track upstream
+  breaking changes.
+- LLM-based translation now uses deterministic decoding so the same input produces the
+  same output across runs.
+- Inflight delegation requests that get rejected now run their cleanup chain to
+  completion instead of leaking pending promises.
+
+## Model Registry Changes
+
+The model registry was regenerated around companion-set metadata. The user-facing surface
+is leaner: families that used to live as separate `*_DATA`, `*_LEX`, `*_VOCAB`, and
+`METADATA_*` constants are now companion-only — they're still downloaded, but they're
+not addressable as standalone model sources. Marian Opus models were renamed under the
+`NMT_*` namespace to match the rest of the NMT family.
+
+### Added
+
+```
+NMT_Q0F16 through NMT_Q0F16_9 (10 entries)
+NMT_Q4_0 through NMT_Q4_0_12+ (22 entries)
+```
+
+### Removed (now companion-only or renamed)
+
+```
+*_DATA (32 entries — companion-only, e.g. PARAKEET_TDT_ENCODER_DATA_FP32, TTS_*_DATA)
+BERGAMOT_*_LEX (93 entries — companion-only)
+BERGAMOT_*_VOCAB (93 entries — companion-only)
+BERGAMOT_METADATA_* (87 entries — companion-only)
+MARIAN_OPUS_* (32 entries — renamed to NMT_*)
+```
+
+## Documentation, Tests, and Infrastructure
+
+- Diffusion documentation was extended to cover the new img2img flows (SDEdit on
+  SD/SDXL, in-context conditioning on FLUX.2).
+- Android sharded-model-resume tests no longer trip Scudo OOM — the test harness now
+  bounds memory more conservatively on long-running resume scenarios.
+- The tests-qvac docs, tooling, and CI workflow job names were refreshed for the new
+  suite filtering and PR-triggered e2e workflows. Suite filtering plus PR-trigger labels
+  let CI run targeted SDK e2e subsets on demand instead of always running the full grid.
+- A pre-terminate cleanup hook stabilises mobile smoke: the mobile auto-close path now
+  awaits worker cleanup acknowledgement before terminating the worklet.
+- `DataLoader` cleanup logic was scoped down to `packages/rag` so the SDK no longer
+  carries that surface.
diff --git a/packages/sdk/changelog/0.10.0/api.md b/packages/sdk/changelog/0.10.0/api.md
new file mode 100644
index 0000000000..69812b7c61
--- /dev/null
+++ b/packages/sdk/changelog/0.10.0/api.md
@@ -0,0 +1,501 @@
+# 🔌 API Changes v0.10.0
+
+## Update SDK nmtcpp plugin for @qvac/translation-nmtcpp 2.0.1
+
+PR: [#1563](https://github.com/tetherto/qvac/pull/1563)
+
+```typescript
+// NMT addon constructor (2.0.1) — called by SDK plugin
+new TranslationNmtcpp({
+  files: {
+    model: '/path/to/model.bin',
+    srcVocab: '/path/to/vocab.spm',
+    dstVocab: '/path/to/vocab.spm',
+    pivotModel: '/path/to/pivot.bin',       // optional
+    pivotSrcVocab: '/path/to/pivot-vocab.spm', // optional
+    pivotDstVocab: '/path/to/pivot-vocab.spm', // optional
+  },
+  params: { srcLang: 'en', dstLang: 'fr' },
+  config: {
+    modelType: TranslationNmtcpp.ModelTypes.Bergamot,
+    beamsize: 4,
+    pivotConfig: { beamsize: 4, topk: 100 }, // optional
+  },
+  logger,
+  opts: { stats: true },
+})
+```
+
+---
+
+## Add sentence-level streaming for onnx text-to-speech
+
+PR: [#1590](https://github.com/tetherto/qvac/pull/1590)
+
+```typescript
+import { loadModel, textToSpeech, unloadModel } from "@qvac/sdk";
+
+const modelId = await loadModel({ /* ...Supertonic ONNX TTS config... */ });
+
+const result = textToSpeech({
+  modelId,
+  text: "Your long passage here.",
+  inputType: "text",
+  stream: true,
+  sentenceStream: true,
+  sentenceStreamLocale: "en",
+});
+
+for await (const chunk of result.chunkUpdates!) {
+  // chunk.buffer      -> int16 PCM samples for this sentence
+  // chunk.chunkIndex  -> 0-based sentence index
+  // chunk.sentenceChunk -> source text for this chunk
+}
+
+await result.done;
+await unloadModel({ modelId });
+```
+
+```typescript
+import { completion, textToSpeechStream } from "@qvac/sdk";
+
+const session = await textToSpeechStream({
+  modelId: ttsModelId,
+  inputType: "text",
+  accumulateSentences: true,
+  sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual"
+  flushAfterMs: 400,
+});
+
+(async () => {
+  for await (const delta of completion({ modelId: llmModelId, /* ... */ }).tokenStream) {
+    session.write(delta);
+  }
+  session.end();
+})();
+
+for await (const chunk of session) {
+  // chunk.buffer       -> int16 PCM for this sentence / flush window
+  // chunk.chunkIndex   -> optional sentence index
+  // chunk.sentenceChunk-> optional source text
+  if (chunk.done) break;
+}
+```
+
+---
+
+## Support the new llm addon cache api in sdk
+
+PR: [#1633](https://github.com/tetherto/qvac/pull/1633)
+
+```ts
+import {
+  completion,
+  deleteCache,
+  LLAMA_3_2_1B_INST_Q4_0,
+  loadModel,
+  unloadModel,
+  VERBOSITY,
+} from "@qvac/sdk";
+
+type ChatMessage = {
+  role: string;
+  content: string;
+};
+
+const cacheKey = "trip-planner";
+
+const modelId = await loadModel({
+  modelSrc: LLAMA_3_2_1B_INST_Q4_0,
+  modelType: "llm",
+  modelConfig: {
+    ctx_size: 4096,
+    verbosity: VERBOSITY.ERROR,
+  },
+});
+
+async function run(history: ChatMessage[]) {
+  const result = completion({
+    modelId,
+    history,
+    stream: true,
+    kvCache: cacheKey,
+  });
+
+  let text = "";
+  for await (const token of result.tokenStream) {
+    text += token;
+  }
+
+  return text.trim();
+}
+
+const firstReply = await run([
+  { role: "system", content: "You are a concise travel assistant." },
+  { role: "user", content: "I like museums and seafood. Plan a day in Lisbon." },
+]);
+
+const followUpReply = await run([
+  { role: "system", content: "You are a concise travel assistant." },
+  { role: "user", content: "I like museums and seafood. Plan a day in Lisbon." },
+  { role: "assistant", content: firstReply },
+  { role: "user", content: "Now make it a rainy-day itinerary." },
+]);
+
+console.log(followUpReply);
+
+await deleteCache({ kvCacheKey: cacheKey });
+await unloadModel({ modelId, clearStorage: false });
+```
+
+---
+
+## Add img2img support to SDK diffusion API
+
+PR: [#1662](https://github.com/tetherto/qvac/pull/1662)
+
+```typescript
+import { loadModel, diffusion, SD_V2_1_1B_Q8_0 } from "@qvac/sdk";
+import fs from "fs";
+
+const modelId = await loadModel({ modelSrc: SD_V2_1_1B_Q8_0, modelType: "diffusion" });
+
+// SD / SDXL — SDEdit
+const initImage = new Uint8Array(fs.readFileSync("input.png"));
+const { outputs } = diffusion({
+  modelId,
+  prompt: "oil painting style, vibrant colors",
+  init_image: initImage,
+  strength: 0.5, // 0 = keep source, 1 = ignore source
+});
+
+// FLUX.2 — in-context conditioning
+// NOTE: requires `prediction: "flux2_flow"` set on the model config at loadModel time.
+// `strength` is ignored on this path.
+const { outputs: fluxOutputs } = diffusion({
+  modelId,
+  prompt: "turn into watercolor",
+  init_image: initImage,
+});
+
+const buffers = await outputs;
+fs.writeFileSync("out.png", buffers[0]!);
+```
+
+---
+
+## Harden suspend with lifecycle gate and add state() api
+
+PR: [#1691](https://github.com/tetherto/qvac/pull/1691)
+
+```typescript
+import { state, suspend, resume, type LifecycleState } from "@qvac/sdk";
+
+await suspend();
+
+const current: LifecycleState = await state();
+// "active" | "suspending" | "suspended" | "resuming"
+
+if (current !== "active") {
+  await resume();
+}
+```
+
+---
+
+## Propagate whisper per-segment metadata to SDK users
+
+PR: [#1701](https://github.com/tetherto/qvac/pull/1701)
+
+```typescript
+// Batch — returns TranscribeSegment[] instead of string
+const segments = await transcribe({
+  modelId,
+  audioChunk: audioFilePath,
+  metadata: true,
+});
+for (const s of segments) {
+  console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`);
+}
+
+// Duplex streaming — session iterator yields TranscribeSegment
+const session = await transcribeStream({ modelId, metadata: true });
+session.write(audioChunk);
+for await (const segment of session) {
+  console.log(segment.startMs, segment.endMs, segment.text);
+}
+session.end();
+```
+
+```typescript
+type TranscribeSegment = {
+  text: string;
+  startMs: number;
+  endMs: number;
+  append: boolean;
+  id: number;
+};
+```
+
+---
+
+## Make auto KV-cache reuse completed turn history
+
+PR: [#1705](https://github.com/tetherto/qvac/pull/1705)
+
+```typescript
+// New: `final.cacheableAssistantContent` — the canonical assistant
+// string the SDK persisted to the auto-cache key on this turn.
+// Push it back into `history` verbatim to guarantee a next-turn hit.
+const run = completion({ modelId, history, kvCache: true });
+for await (const _ of run.tokenStream) { /* stream */ }
+const final = await run.final;
+const nextHistory = [
+  ...history,
+  {
+    role: "assistant",
+    // Falls back to contentText for tool-call turns, which can't
+    // be auto-cached today and therefore omit the field.
+    content: final.cacheableAssistantContent ?? final.contentText,
+  },
+  { role: "user", content: "follow-up question" },
+];
+```
+
+---
+
+## Propagate registry download retries and expose stream timeout
+
+PR: [#1743](https://github.com/tetherto/qvac/pull/1743)
+
+```ts
+import { setSDKConfig } from "@qvac/sdk";
+
+setSDKConfig({
+  // Retry REQUEST_TIMEOUT failures up to N times before giving up.
+  // Set to 0 to disable retries entirely.
+  registryDownloadMaxRetries: 5,
+
+  // Raise the per-block stream timeout for slow/high-latency links
+  // (default: 60_000 ms).
+  registryStreamTimeoutMs: 180_000,
+});
+```
+
+---
+
+## Improve model type & capability system
+
+PR: [#1748](https://github.com/tetherto/qvac/pull/1748)
+
+```typescript
+import type { LoadModelOptions } from "@qvac/sdk";
+
+const opts: LoadModelOptions = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+```
+
+```typescript
+import type { LoadCustomPluginModelOptions } from "@qvac/sdk";
+
+// Generic must be supplied; it pins the literal plugin string.
+const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+
+// Or just drop the annotation — TS picks the right overload.
+```
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.PLUGIN_HANDLER_NOT_FOUND) {
+    /* ... */
+  }
+}
+```
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) {
+    // Message includes the requested operation, the loaded model type,
+    // supported operations on the loaded model, and suggested model types.
+  }
+}
+```
+
+```typescript
+// Loaded an NMT model, but called translate with modelType: "llm".
+// Worker routed to the NMT plugin (modelId-based) but treated the input as LLM-style. Confusing failures.
+await translate({ modelId: nmtModelId, modelType: "llm", text: "..." });
+```
+
+```typescript
+// Drop modelType; the registered type drives behavior.
+await translate({ modelId: nmtModelId, text: "..." });
+
+// Or keep it — but it must match the loaded type, otherwise:
+//   ModelTypeMismatchError: expected "nmtcpp-translation", got "llamacpp-completion"
+await translate({ modelId: nmtModelId, modelType: "nmt", text: "..." });
+```
+
+```typescript
+import { getLoadedModelInfo, transcribe } from "@qvac/sdk";
+
+// Introspect a loaded modelId (local or delegated). Discriminated on `isDelegated`.
+const info = await getLoadedModelInfo({ modelId });
+
+// Preflight a built-in SDK call before sending the RPC.
+// Local: handlers + modelType are authoritative.
+// Delegated: handlers is [] and preflight defers to the provider.
+if (info.isDelegated || info.handlers.includes("transcribeStream")) {
+  await transcribe({ modelId /* ... */ });
+}
+
+if (!info.isDelegated) {
+  // info.modelType, info.loadedAt
+  // info.displayName?, info.addonPackage?  (from the plugin)
+  // info.name?, info.path?                 (from the model file)
+}
+
+// Throws ModelNotFoundError if modelId isn't loaded.
+```
+
+---
+
+## Add responseFormat for structured output
+
+PR: [#1768](https://github.com/tetherto/qvac/pull/1768)
+
+```typescript
+import { completion } from "@qvac/sdk";
+
+const run = completion({
+  modelId,
+  history: [{ role: "user", content: "Extract person info: I'm Alice, 30, data engineer." }],
+  stream: true,
+  responseFormat: {
+    type: "json_schema",
+    json_schema: {
+      name: "Person",
+      schema: {
+        type: "object",
+        properties: {
+          name: { type: "string" },
+          age: { type: "integer" },
+          occupation: { type: "string" },
+        },
+        required: ["name", "age", "occupation"],
+        additionalProperties: false,
+      },
+    },
+  },
+});
+
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+}
+const final = await run.final;
+JSON.parse(final.contentText); // guaranteed schema-valid
+```
+
+---
+
+## Sdk "dynamic" tools mode
+
+PR: [#1779](https://github.com/tetherto/qvac/pull/1779)
+
+```typescript              
+  import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk";
+                                                                                                                                                                                                                                                      
+  // Opt into dynamic tools by setting `toolsMode` on the model config.
+  const modelId = await loadModel({                                                                                                                                                                                                                   
+    modelSrc: QWEN3_1_7B_INST_Q4,
+    modelType: "llm",                                                                                                                                                                                                                                 
+    modelConfig: {
+      ctx_size: 4096,                                                                                                                                                                                                                                 
+      tools: true,           
+      toolsMode: TOOLS_MODE.dynamic, // or the literal string "dynamic"
+    },                                                                                                                                                                                                                                                
+  });
+                                                                                                                                                                                                                                                      
+  const kvCache = `dynamic-tools-${Date.now()}`;
+  const history = [
+    { role: "system", content: "You are a helpful assistant." },
+    { role: "user", content: "What's the weather in Tokyo?" },                                                                                                                                                                                        
+  ];
+                                                                                                                                                                                                                                                      
+  // Turn 1 — weather tools available.
+  const turn1 = completion({
+    modelId,                                                                                                                                                                                                                                          
+    history,
+    kvCache,                                                                                                                                                                                                                                          
+    stream: true,            
+    tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }],
+  });                                                                                                                                                                                                                                                 
+   
+  // Turn 2 — same kvCache, completely different tools. The addon trims the                                                                                                                                                                           
+  // previous tool block from the cache, so this rotation is free.
+  history.push({ role: "user", content: "Now check my horoscope for Aquarius." });
+  const turn2 = completion({                                                                                                                                                                                                                          
+    modelId,
+    history,                                                                                                                                                                                                                                          
+    kvCache,                 
+    stream: true,
+    tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }],
+  });                                                                                                                                                                                                                                                 
+  ```
+
+---
+
+## Pre-terminate cleanup hook + stabilise mobile smoke
+
+PR: [#1797](https://github.com/tetherto/qvac/pull/1797)
+
+```typescript
+// Mobile auto-close path (unchanged from caller perspective):
+await close(); // now awaits worker cleanup ack before terminating worklet
+```
+
+---
+
+## Add native tool-call dialect routing (hermes, pythonic, json) with override
+
+PR: [#1802](https://github.com/tetherto/qvac/pull/1802)
+
+```typescript
+// New optional `toolDialect` parameter on completion() — force a specific
+// parser chain when the SDK can't auto-detect from the model name.
+import { completion } from "@qvac/sdk";
+
+const result = completion({
+  modelId,
+  history,
+  tools,
+  stream: true,
+  toolDialect: "pythonic", // "hermes" | "pythonic" | "json"
+});
+
+Common override case: Llama 3.x tool-calling fine-tunes that emit the native pythonic header (`<|start_header_id|>tool_call<|end_header_id|>...<|eot_id|>`). Auto-routing keeps these on `hermes` because most observed Llama 3.x tool-calling tunes empirically emit JSON, not pythonic — pass `toolDialect: "pythonic"` for tunes that do emit the native framing
+```
+
+```typescript
+import type { ToolDialect } from "@qvac/sdk";
+```
+
+---
+
diff --git a/packages/sdk/changelog/0.10.0/breaking.md b/packages/sdk/changelog/0.10.0/breaking.md
new file mode 100644
index 0000000000..735e816d6b
--- /dev/null
+++ b/packages/sdk/changelog/0.10.0/breaking.md
@@ -0,0 +1,278 @@
+# 💥 Breaking Changes v0.10.0
+
+## Add parallel orchestration, download dedupe, and generic companion-set support
+
+PR: [#1636](https://github.com/tetherto/qvac/pull/1636)
+
+**BEFORE:**
+**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.onnxInfo) {
+    console.log(
+      `[${progress.onnxInfo.currentFile}] ` +
+      `file ${progress.onnxInfo.fileIndex}/${progress.onnxInfo.totalFiles} — ` +
+      `${progress.onnxInfo.overallPercentage.toFixed(1)}% overall`
+    );
+  }
+}
+```
+
+**
+
+**AFTER:**
+**
+
+```typescript
+onProgress: (progress) => {
+  if (progress.fileSetInfo) {
+    console.log(
+      `[${progress.fileSetInfo.currentFile}] ` +
+      `file ${progress.fileSetInfo.fileIndex}/${progress.fileSetInfo.totalFiles} — ` +
+      `${progress.fileSetInfo.overallPercentage.toFixed(1)}% overall`
+    );
+  }
+}
+```
+
+## 🔌 Extensibility: adding a new companion format
+
+The companion pipeline is generic. Only `companions.ts` contains format-specific detection (currently ONNX). To add a new format:
+
+1. Add a detection function in `companions.ts`
+2. Call it from `groupCompanionSets`
+
+Everything downstream (codegen, resolver, cache probing, storage cleanup) handles it automatically.
+
+## 🧪 How was it tested?
+
+- Unit tests for `resolveClearStorageTarget` — companion set paths, legacy ONNX paths, flat cache, outside-cache paths, trailing slashes, Windows backslash paths
+- Unit tests for `groupCompanionSets` — ONNX + `_data`/`.data` patterns, non-ONNX models, deterministic `setKey` generation
+- **Companion set smoke test** — ran Parakeet CTC and TDT end-to-end, validated all 4 cache paths (legacy `_data` probe, legacy `.data` probe, canonical fresh download, canonical cache hit) with correct transcription output on each
+- **Parallel orchestration**: ran `examples/llamacpp-multimodal.ts` with labeled progress — confirmed primary and projection models download concurrently via interleaved output
+- **Profiling**: ran `examples/profiling/basic.ts` — confirmed `sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`, `modelInitializationTime`, `checksumValidationTime` populate correctly through `buildDownloadProfilingFields()`. Ran `examples/llamacpp-multimodal.ts` with profiling enabled — confirmed aggregate stats merge correctly across primary and projection downloads
+- **Cancellation**: `^C` during multimodal download cleanly aborts both active transfers with no leaked state
+- Build, lint, and typecheck pass
+
+---
+
+## Unified CompletionEvent stream as canonical completion API
+
+PR: [#1673](https://github.com/tetherto/qvac/pull/1673)
+
+**BEFORE:**
+**
+
+```typescript
+// Wire response
+{ type: "completionStream", token: "Hello", toolCallEvent: {...} }
+{ type: "completionStream", token: "", done: true, stats: {...}, toolCalls: [...] }
+```
+
+**
+
+**AFTER:**
+**
+
+```typescript
+// Wire response
+{ type: "completionStream", events: [{ type: "contentDelta", seq: 0, text: "Hello" }] }
+{ type: "completionStream", done: true, events: [
+  { type: "completionStats", seq: 5, stats: {...} },
+  { type: "completionDone", seq: 6, raw: { fullText: "..." } }
+]}
+```
+
+**Client API**: `completion()` return type is now `CompletionRun` (was anonymous object). Legacy fields still work but are derived views.
+
+**BEFORE:**
+
+```typescript
+const result = completion({ modelId, history, stream: true });
+for await (const token of result.tokenStream) { ... }
+const stats = await result.stats;
+```
+
+**AFTER:**
+
+```typescript
+const run = completion({ modelId, history, stream: true, captureThinking: true });
+for await (const event of run.events) {
+  if (event.type === "contentDelta") process.stdout.write(event.text);
+  if (event.type === "toolCall") console.log(event.call.name);
+}
+const result = await run.final;
+// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText
+```
+
+## 🧪 How was it tested?
+
+- **Unit tests**: event schema validation and wire strictness, normalizer state machine (content, thinking, tool framing, fail-open, error-finish, scoped dedupe), and client-side event aggregation with error-done rejection
+- **Manual**: ran `examples/completion-events.ts` (new event-driven API) and existing legacy examples — both produce correct output
+- Build and typecheck passes
+
+---
+
+## Migrate SDK plugins to new addon constructor shape
+
+PR: [#1688](https://github.com/tetherto/qvac/pull/1688)
+
+**BEFORE:**
+**
+
+```typescript
+export const myPlugin = definePlugin({
+  // ...
+  createModel(params: CreateModelParams): PluginModelResult {
+    return { model, loader: null };
+  },
+});
+```
+
+**
+
+**AFTER:**
+**
+
+---
+
+## Switch delegation to direct DHT connect, drop topic end-to-end
+
+PR: [#1729](https://github.com/tetherto/qvac/pull/1729)
+
+**BEFORE:**
+**
+- Consumer: `swarm.join(topic)` → `swarm.flush()` → wait for `connection` event matching `peerPublicKey` → filter out everyone else.
+- Provider: `swarm.join(topic, { server: true })` → `discovery.flushed()` → `swarm.flush()` (full topic announce on the DHT).
+
+**
+
+**AFTER:**
+**
+- Consumer: `swarm.dht.connect(publicKey)` — direct connection, no discovery, no filtering.
+- Provider: `swarm.listen()` — binds the DHT server on the keyPair so consumers can reach it via `dht.connect(publicKey)`. No topic announce.
+
+---
+
+## Improve model type & capability system
+
+PR: [#1748](https://github.com/tetherto/qvac/pull/1748)
+
+**BEFORE:**
+**
+
+```typescript
+import type { LoadModelOptions } from "@qvac/sdk";
+
+const opts: LoadModelOptions = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+```
+
+**
+
+**AFTER:**
+**
+
+```typescript
+import type { LoadCustomPluginModelOptions } from "@qvac/sdk";
+
+// Generic must be supplied; it pins the literal plugin string.
+const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = {
+  modelSrc: "/path/foo",
+  modelType: "my-custom-plugin",
+  modelConfig: { whatever: 1 },
+};
+await loadModel(opts);
+
+// Or just drop the annotation — TS picks the right overload.
+```
+
+### Wrong-model error code/message change (runtime)
+
+Built-in SDK operations now surface `MODEL_OPERATION_NOT_SUPPORTED` instead of `PLUGIN_HANDLER_NOT_FOUND`. Low-level `pluginInvoke` / `pluginInvokeStream` still use `PLUGIN_HANDLER_NOT_FOUND`.
+
+**BEFORE:**
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.PLUGIN_HANDLER_NOT_FOUND) {
+    /* ... */
+  }
+}
+```
+
+**AFTER:**
+
+```typescript
+import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk";
+
+try {
+  await transcribe({ modelId: llmModelId /* ... */ });
+} catch (e) {
+  if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) {
+    // Message includes the requested operation, the loaded model type,
+    // supported operations on the loaded model, and suggested model types.
+  }
+}
+```
+
+`PLUGIN_HANDLER_NOT_FOUND` is still the low-level path for `pluginInvoke` / `pluginInvokeStream`.
+
+### `translate(...)` validates caller-supplied `modelType` against loaded type (runtime)
+
+`translate(...)` now routes by the loaded model's registered type. A mismatched caller-supplied `modelType` throws `ModelTypeMismatchError` instead of being silently mis-routed.
+
+**BEFORE:**
+
+```typescript
+// Loaded an NMT model, but called translate with modelType: "llm".
+// Worker routed to the NMT plugin (modelId-based) but treated the input as LLM-style. Confusing failures.
+await translate({ modelId: nmtModelId, modelType: "llm", text: "..." });
+```
+
+**AFTER:**
+
+```typescript
+// Drop modelType; the registered type drives behavior.
+await translate({ modelId: nmtModelId, text: "..." });
+
+// Or keep it — but it must match the loaded type, otherwise:
+//   ModelTypeMismatchError: expected "nmtcpp-translation", got "llamacpp-completion"
+await translate({ modelId: nmtModelId, modelType: "nmt", text: "..." });
+```
+
+## 🔌 API Changes
+
+```typescript
+import { getLoadedModelInfo, transcribe } from "@qvac/sdk";
+
+// Introspect a loaded modelId (local or delegated). Discriminated on `isDelegated`.
+const info = await getLoadedModelInfo({ modelId });
+
+// Preflight a built-in SDK call before sending the RPC.
+// Local: handlers + modelType are authoritative.
+// Delegated: handlers is [] and preflight defers to the provider.
+if (info.isDelegated || info.handlers.includes("transcribeStream")) {
+  await transcribe({ modelId /* ... */ });
+}
+
+if (!info.isDelegated) {
+  // info.modelType, info.loadedAt
+  // info.displayName?, info.addonPackage?  (from the plugin)
+  // info.name?, info.path?                 (from the model file)
+}
+
+// Throws ModelNotFoundError if modelId isn't loaded.
+```
+
+---
+
diff --git a/packages/sdk/changelog/0.10.0/models.md b/packages/sdk/changelog/0.10.0/models.md
new file mode 100644
index 0000000000..fe7acc2b40
--- /dev/null
+++ b/packages/sdk/changelog/0.10.0/models.md
@@ -0,0 +1,20 @@
+# 📦 Model Changes v0.10.0
+
+## Added Models
+
+```
+NMT_Q0F16 through NMT_Q0F16_9
+NMT_Q4_0 through NMT_Q4_0_12+
+```
+
+## Removed Models
+
+```
+MARIAN_OPUS_*
+```
+
+---
+
+### Related PRs
+
+- [#1700](https://github.com/tetherto/qvac/pull/1700) - Regenerate model registry with companion-set metadata
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index f241b7637e..e54eb963cf 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@qvac/sdk",
-  "version": "0.9.1",
+  "version": "0.10.0",
   "license": "Apache-2.0",
   "repository": {
     "type": "git",
diff --git a/scripts/sdk/generate-changelog-sdk-pod.cjs b/scripts/sdk/generate-changelog-sdk-pod.cjs
index da3ec14151..363464ecd1 100644
--- a/scripts/sdk/generate-changelog-sdk-pod.cjs
+++ b/scripts/sdk/generate-changelog-sdk-pod.cjs
@@ -33,6 +33,54 @@ const SECTIONS = [
   { key: "infra", title: "⚙️ Infrastructure" },
 ];
 
+/**
+ * Maximum number of model entries to inline per section (Added/Updated/Removed)
+ * in the main CHANGELOG.md. Anything beyond is collapsed to "(and N more)" and
+ * the reader is expected to follow the link to models.md for the full list.
+ */
+const MAX_INLINE_MODELS = 5;
+
+/**
+ * Maximum number of bullets to include per section in the Slack announcement
+ * post. Anything beyond is collapsed to "... And much more, see full list in
+ * changelog :memo:". Sections with 10 or fewer entries are emitted verbatim;
+ * the "And much more" suffix only appears when a section has *more than 10*
+ * entries.
+ */
+const MAX_ANNOUNCEMENT_BULLETS = 10;
+
+/**
+ * Map of section keys to Slack-style emoji headings used in the
+ * announcement-post.txt template. Slack does not render the unicode emojis
+ * we use in CHANGELOG.md, so we translate to shortcodes here.
+ */
+const SLACK_SECTION_HEADINGS = {
+  feat: ":sparkles: Features",
+  api: ":electric_plug: API",
+  fix: ":ladybug: Fixes",
+  mod: ":package: Models",
+  doc: ":blue_book: Docs",
+  test: ":test_tube: Tests",
+  chore: ":broom: Chores",
+  infra: ":gear: Infrastructure",
+};
+
+/**
+ * Map from the unicode emoji used in CHANGELOG.md headings back to the
+ * internal section key, so the announcement generator can parse a freshly
+ * written CHANGELOG.md without re-running the upstream PR fetch.
+ */
+const CHANGELOG_HEADING_TO_KEY = {
+  "✨": "feat",
+  "🔌": "api",
+  "🐞": "fix",
+  "📦": "mod",
+  "📘": "doc",
+  "🧪": "test",
+  "🧹": "chore",
+  "⚙️": "infra",
+};
+
 /**
  * Extract code blocks from markdown
  * @param {string} text
@@ -302,6 +350,105 @@ function capitalize(str) {
   return str.charAt(0).toUpperCase() + str.slice(1);
 }
 
+/**
+ * Detect a companion entry in a Models section line. Companions (vocab files,
+ * lexicons, raw data shards, metadata blobs, etc.) ship alongside a primary
+ * model but aren't independently usable models, so we exclude them from the
+ * changelog and announcement post — only first-class models should be
+ * surfaced to readers.
+ *
+ * Recognises both:
+ *   - Constant-name suffixes: `*_LEX`, `*_VOCAB`, `*_DATA`, `*_METADATA`
+ *   - Free-form descriptions containing the word "companion"
+ *
+ * @param {string} entry - One Added/Updated/Removed list line
+ * @returns {boolean}
+ */
+function isCompanionEntry(entry) {
+  if (!entry) return false;
+  if (/companion/i.test(entry)) return true;
+  if (/_lex\b/i.test(entry)) return true;
+  if (/_vocab\b/i.test(entry)) return true;
+  if (/_data\b/i.test(entry)) return true;
+  if (/_metadata\b/i.test(entry)) return true;
+  return false;
+}
+
+/**
+ * Strip "(N entries …)" / "(N entries — …)" suffixes commonly used in
+ * free-form Models sections (e.g. PR #1700-style summaries). The reader can
+ * find exact counts in models.md if they need them; the changelog should
+ * stay focused on the model identities themselves.
+ *
+ * @param {string} entry
+ * @returns {string}
+ */
+function stripEntryCount(entry) {
+  if (!entry) return entry;
+  return entry
+    .replace(/\s*\(\s*\d+\s*entries?(?:\s*[—–-][^)]*)?\)\s*/gi, "")
+    .trim();
+}
+
+/**
+ * Apply changelog model-section policy to a raw list of entries: drop
+ * companions, strip entry-count suffixes, drop empty results.
+ *
+ * @param {string[]} entries
+ * @returns {string[]}
+ */
+function cleanModelEntries(entries) {
+  if (!entries || entries.length === 0) return [];
+  return entries
+    .filter((e) => !isCompanionEntry(e))
+    .map((e) => stripEntryCount(e))
+    .filter((e) => e && e.length > 0);
+}
+
+/**
+ * Format a single model section (Added / Updated / Removed) for inline display
+ * in the main CHANGELOG.md. Trims to MAX_INLINE_MODELS entries with a
+ * "(and N more)" suffix. Returns null if the section is empty.
+ *
+ * Companions and entry counts are filtered upstream by `cleanModelEntries`.
+ *
+ * @param {string} label - e.g. "Added", "Updated", "Removed"
+ * @param {string[]} names
+ * @returns {string|null}
+ */
+function summarizeModelList(label, names) {
+  const cleaned = cleanModelEntries(names);
+  if (cleaned.length === 0) return null;
+  const shown = cleaned.slice(0, MAX_INLINE_MODELS);
+  const extra = cleaned.length - shown.length;
+  let summary = `${label}: ${shown.join(", ")}`;
+  if (extra > 0) summary += ` (and ${extra} more)`;
+  return summary;
+}
+
+/**
+ * Build a per-section summary (Added / Updated / Removed) of the model lists
+ * from a PR body, suitable for use as indented continuation lines under a
+ * CHANGELOG.md bullet. Returns null if the PR has no Models section or all
+ * sections are empty after companion/entry-count filtering.
+ *
+ * Returns an array of lines (one per non-empty section). The caller is
+ * responsible for indenting them appropriately under the bullet.
+ *
+ * @param {string} prBody
+ * @returns {string[]|null}
+ */
+function buildInlineModelSummary(prBody) {
+  const models = extractModelsSection(prBody);
+  if (!models) return null;
+  const parts = [
+    summarizeModelList("Added", models.added),
+    summarizeModelList("Updated", models.updated),
+    summarizeModelList("Removed", models.removed),
+  ].filter(Boolean);
+  return parts.length > 0 ? parts : null;
+}
+
 /**
  * Generate changelog entry
  * @param {object} pr
@@ -337,6 +484,20 @@ function generateChangelogEntry(
     entry += ` - See ${links.join(", ")}`;
   }
 
+  // For [mod] PRs, append the trimmed Added/Updated/Removed model lists as
+  // indented continuation lines under the bullet. Companions and entry-count
+  // suffixes are filtered out by `buildInlineModelSummary`, so what shows up
+  // here are first-class model identities only — the full list (including
+  // companions, if the PR author chose to keep them) lives in models.md.
+  if (parsed.tags.includes("mod")) {
+    const modelLines = buildInlineModelSummary(pr.body);
+    if (modelLines) {
+      for (const line of modelLines) {
+        entry += `\n  ${line}`;
+      }
+    }
+  }
+
   return entry;
 }
 
@@ -486,10 +647,12 @@ function generateChangelogFiles(packageName, version, prs, outputDir, baseRef) {
       }
     }
 
-    // Sort alphabetically
-    const addedList = [...allAdded].sort();
-    const updatedList = [...allUpdated].sort();
-    const removedList = [...allRemoved].sort();
+    // Apply the changelog model-section policy: drop companions, strip
+    // entry-count suffixes. Keeps models.md aligned with what the main
+    // CHANGELOG.md surfaces — only first-class model identities.
+    const addedList = cleanModelEntries([...allAdded].sort());
+    const updatedList = cleanModelEntries([...allUpdated].sort());
+    const removedList = cleanModelEntries([...allRemoved].sort());
 
     let modelsMd = `# 📦 Model Changes v${version}\n\n`;
 
@@ -605,6 +768,25 @@ function groupModelsByPrefix(names) {
   return groups;
 }
 
+/**
+ * Detect a backmerge PR subject.
+ *
+ * Backmerges merge a release branch back into main; their content is already
+ * documented in the release branch's own changelog, so listing them here is
+ * noise. Recognises the QVAC convention (`Backmerge release sdk 0.9.1`) plus
+ * common variants like `Merge release-sdk-0.9.1 into main`.
+ *
+ * @param {string} subject - PR subject (after prefix/tags)
+ * @returns {boolean}
+ */
+function isBackmergeSubject(subject) {
+  if (!subject) return false;
+  const s = subject.trim().toLowerCase();
+  if (s.startsWith("backmerge")) return true;
+  if (/^merge\s+release[\s-]/.test(s)) return true;
+  return false;
+}
+
 /**
  * Process raw PRs with SDK-specific validation and filtering
  * @param {Array<{number: number, title: string, body: string, url: string}>} rawPRs
@@ -631,6 +813,13 @@ function processSDKPRs(rawPRs) {
       continue;
     }
 
+    if (isBackmergeSubject(validation.parsed.subject)) {
+      console.log(
+        `  ⏭️  PR #${pr.number} is a backmerge, excluding from changelog`,
+      );
+      continue;
+    }
+
     prs.push({
       number: pr.number,
       title: pr.title,
@@ -726,6 +915,252 @@ function rebuildRootChangelog(packageName) {
   );
 }
 
+/**
+ * Parse a generated CHANGELOG.md into structured sections with bullet entries.
+ * Used by the announcement-post generator so it can transform the canonical
+ * release changelog without re-fetching PRs from GitHub.
+ *
+ * @param {string} markdown - Contents of CHANGELOG.md
+ * @returns {{
+ *   version: string|null,
+ *   releaseDate: string|null,
+ *   sections: Array<{ key: string, heading: string, bullets: Array<{
+ *     text: string,
+ *     prNumber: string|null,
+ *     prUrl: string|null,
+ *     isBreaking: boolean,
+ *     isApi: boolean,
+ *     isModels: boolean,
+ *   }> }>,
+ * }}
+ */
+function parseChangelogMarkdown(markdown) {
+  const out = { version: null, releaseDate: null, sections: [] };
+
+  const versionMatch = markdown.match(/^# Changelog v(\d+\.\d+\.\d+)/m);
+  if (versionMatch) out.version = versionMatch[1];
+
+  const dateMatch = markdown.match(/^Release Date:\s*(\S+)/m);
+  if (dateMatch) out.releaseDate = dateMatch[1];
+
+  const lines = markdown.split("\n");
+  let current = null;
+  let currentBullet = null;
+
+  for (const rawLine of lines) {
+    const line = rawLine.replace(/\r$/, "");
+
+    // ## <emoji> <Title>
+    const headingMatch = line.match(/^##\s+(\S+)\s+(.+?)\s*$/);
+    if (headingMatch) {
+      const [, emoji, title] = headingMatch;
+      const key = CHANGELOG_HEADING_TO_KEY[emoji];
+      if (key) {
+        current = { key, heading: `${emoji} ${title}`, bullets: [] };
+        out.sections.push(current);
+        currentBullet = null;
+        continue;
+      }
+      // Unknown heading — close the current section so stray bullets don't
+      // get attached to a previous, unrelated section.
+      current = null;
+      currentBullet = null;
+      continue;
+    }
+
+    if (!current) continue;
+
+    if (line.startsWith("- ")) {
+      // New bullet; flush the running bullet reference.
+      const prMatch = line.match(/\(see PR \[#(\d+)\]\(([^)]+)\)\)/);
+      const prNumber = prMatch ? prMatch[1] : null;
+      const prUrl = prMatch ? prMatch[2] : null;
+
+      let text = line.slice(2);
+      if (prMatch) {
+        text = text.slice(0, prMatch.index - 2).trim();
+      }
+      text = text.replace(/\s*-\s*See\s+\[.*$/, "").trim();
+      text = text.replace(/\.+$/, "").trim();
+
+      const linkSuffix = line.slice(
+        prMatch ? prMatch.index + prMatch[0].length : 0,
+      );
+      const isBreaking = /\[breaking changes\]/i.test(linkSuffix);
+      const isApi = /\[API changes\]/i.test(linkSuffix);
+      const isModels = /\[model changes\]/i.test(linkSuffix);
+
+      currentBullet = {
+        text,
+        prNumber,
+        prUrl,
+        isBreaking,
+        isApi,
+        isModels,
+        continuation: [],
+      };
+      current.bullets.push(currentBullet);
+      continue;
+    }
+
+    // Indented continuation line (markdown convention: 2+ leading spaces or
+    // tab under the bullet). Preserve trimmed content so the announcement
+    // formatter can re-indent for Slack.
+    if (currentBullet && /^(\s{2,}|\t)/.test(line) && line.trim().length > 0) {
+      currentBullet.continuation.push(line.trim());
+      continue;
+    }
+
+    // Blank line or other content — close the running bullet so subsequent
+    // indented text doesn't accidentally attach to an unrelated bullet.
+    if (line.trim().length === 0) {
+      currentBullet = null;
+    }
+  }
+
+  return out;
+}
+
+/**
+ * Format a single bullet for the Slack announcement post.
+ *
+ * Continuation lines (e.g. `Added: …` / `Removed: …` for [mod] PRs) are
+ * preserved as separate lines indented by two spaces under the bullet.
+ *
+ * @param {{
+ *   text: string,
+ *   prUrl: string|null,
+ *   isBreaking: boolean,
+ *   continuation?: string[],
+ * }} bullet
+ * @returns {string}
+ */
+function formatAnnouncementBullet(bullet) {
+  let line = `• ${bullet.text}`;
+  if (bullet.prUrl) line += ` (<${bullet.prUrl}>)`;
+  if (bullet.isBreaking) line += ` :boom: breaking`;
+
+  if (bullet.continuation && bullet.continuation.length > 0) {
+    for (const cont of bullet.continuation) {
+      line += `\n  ${cont}`;
+    }
+  }
+
+  return line;
+}
+
+/**
+ * Generate `announcement-post.txt` from a per-version CHANGELOG.md.
+ *
+ * The output is plaintext sized to be copy-pasted into Slack: shortcode
+ * emojis, `<url>` link wrapping (suppresses Slack unfurl), bullet rows kept
+ * on a single line, sections capped at MAX_ANNOUNCEMENT_BULLETS with an
+ * "...And much more" suffix when truncated.
+ *
+ * @param {string} packageName
+ * @param {string} version
+ * @returns {string|null} The output path on success, or null if CHANGELOG.md
+ *   for the requested version wasn't found.
+ */
+function generateAnnouncementPost(packageName, version) {
+  const repoRoot = getRepoRoot();
+  const versionDir = path.join(
+    repoRoot,
+    "packages",
+    packageName,
+    "changelog",
+    version,
+  );
+  const changelogPath = path.join(versionDir, "CHANGELOG.md");
+
+  if (!fs.existsSync(changelogPath)) {
+    console.warn(`⚠️ No CHANGELOG.md found at ${changelogPath}`);
+    return null;
+  }
+
+  const markdown = fs.readFileSync(changelogPath, "utf8");
+  const parsed = parseChangelogMarkdown(markdown);
+  const releaseDate =
+    parsed.releaseDate || new Date().toISOString().split("T")[0];
+
+  const repoUrl = "https://github.com/tetherto/qvac";
+  const npmName = `@qvac/${packageName}`;
+  const tagName = `${packageName}-v${version}`;
+  const changelogTreeUrl = `${repoUrl}/tree/main/packages/${packageName}/changelog/${version}`;
+  const breakingMdUrl = `${repoUrl}/blob/main/packages/${packageName}/changelog/${version}/breaking.md`;
+  const releaseTagUrl = `${repoUrl}/releases/tag/${tagName}`;
+  const npmUrl = `https://www.npmjs.com/package/${npmName}/v/${version}`;
+
+  const hasBreaking = parsed.sections.some((s) =>
+    s.bullets.some((b) => b.isBreaking),
+  );
+
+  let post = "";
+
+  // Header
+  post += `:qvac: SDK ${version} :rocket: NPM Public release\n\n`;
+
+  // Links
+  post += `:package: NPM: ${npmUrl}\n`;
+  post += `:technologist: Github release: ${releaseTagUrl}\n`;
+  post += `:page_facing_up: Full Changelog: ${changelogTreeUrl}\n\n`;
+
+  // Breaking
+  if (hasBreaking) {
+    post += `:warning: Breaking Changes\n`;
+    post += `See full migration guide: ${breakingMdUrl}\n\n`;
+  }
+
+  post += `Release Date: ${releaseDate}\n\n`;
+
+  // Sections — preserve the canonical order from SECTIONS, render only the
+  // ones that have bullets in the parsed changelog.
+  for (const section of SECTIONS) {
+    const heading = SLACK_SECTION_HEADINGS[section.key];
+    if (!heading) continue;
+
+    const matched = parsed.sections.find((s) => s.key === section.key);
+    if (!matched || matched.bullets.length === 0) continue;
+
+    post += `${heading}\n`;
+
+    const shown = matched.bullets.slice(0, MAX_ANNOUNCEMENT_BULLETS);
+    for (const bullet of shown) {
+      post += formatAnnouncementBullet(bullet) + "\n";
+    }
+
+    if (matched.bullets.length > MAX_ANNOUNCEMENT_BULLETS) {
+      post += `... And much more, see full list in changelog :memo:\n`;
+    }
+
+    post += "\n";
+  }
+
+  post += `Thanks to everyone on QVAC team :green_heart: :qvac: :green_heart:\n`;
+
+  const outPath = path.join(versionDir, "announcement-post.txt");
+  fs.writeFileSync(outPath, post);
+  console.log(`✅ Generated ${outPath}`);
+  return outPath;
+}
+
+/**
+ * Resolve the current package version from package.json.
+ *
+ * @param {string} packageName
+ * @returns {string}
+ */
+function readPackageVersion(packageName) {
+  const pkgPath = path.join(
+    getRepoRoot(),
+    "packages",
+    packageName,
+    "package.json",
+  );
+  const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
+  return pkg.version;
+}
+
 /**
  * Main function
  */
@@ -742,6 +1177,17 @@ async function main() {
     process.exit(0);
   }
 
+  if ("generate-announcement-post" in params) {
+    if (!params.package) {
+      console.error("--package is required with --generate-announcement-post");
+      process.exit(1);
+    }
+
+    const version = params.version || readPackageVersion(params.package);
+    const out = generateAnnouncementPost(params.package, version);
+    process.exit(out ? 0 : 1);
+  }
+
   if (!params.package) {
     console.error("Usage:");
     console.error(
@@ -755,7 +1201,13 @@ async function main() {
     );
     console.error("  --base-version   Version label for base commit");
     console.error("  --release-type   minor or patch (auto-detected from package.json version)");
-    console.error("  --update-root-changelog  Update root CHANGELOG.md");
+    console.error("  --update-root-changelog       Update root CHANGELOG.md");
+    console.error(
+      "  --generate-announcement-post  Generate announcement-post.txt for the package's current version",
+    );
+    console.error(
+      "  --version                     Override version when used with --generate-announcement-post",
+    );
     process.exit(1);
   }
 
@@ -821,5 +1273,7 @@ module.exports = {
   generateChangelogEntry,
   generateChangelogFiles,
   processSDKPRs,
+  parseChangelogMarkdown,
+  generateAnnouncementPost,
   SECTIONS,
 };