From e7603fdb62e8f8bd818326b77501181b745a5a51 Mon Sep 17 00:00:00 2001 From: namelsking Date: Fri, 1 May 2026 16:09:16 +0200 Subject: [PATCH] feat[skiplog]: improve sdk-changelog tooling and add 0.10.0 release notes Tooling (scripts/sdk/generate-changelog-sdk-pod.cjs): - Backmerge filter: PRs whose subject starts with `Backmerge` or `Merge release ...` are skipped during processSDKPRs (same shape as the existing [skiplog] filter). - Companion filter + entry-count strip: new isCompanionEntry, stripEntryCount, cleanModelEntries helpers applied to the inline [mod] summary in CHANGELOG.md and the body of models.md. Recognises *_LEX / *_VOCAB / *_DATA / *_METADATA constant suffixes and any line containing the word "companion". - Indented continuation lines for [mod] PRs: Added/Updated/Removed are emitted as indented sub-rows under the bullet (capped at MAX_INLINE_MODELS = 5 per section, "(and N more)" for the rest) instead of stuffed inline. - Announcement-post generator: new --generate-announcement-post CLI flag (with optional --version) parses CHANGELOG.md via parseChangelogMarkdown and emits the Slack template (:qvac: header, NPM/GitHub/changelog links, conditional :warning: Breaking Changes, per-section bullets with link wrapping and :boom: breaking markers, footer). Sections cap at MAX_ANNOUNCEMENT_BULLETS = 10 with "... And much more, see full list in changelog :memo:" only when strictly more than 10. - New helpers exported: parseChangelogMarkdown, generateAnnouncementPost. Skill (.cursor/skills/sdk-changelog/SKILL.md): - Step 4 (CHANGELOG_LLM.md) is now mandatory. - New Step 5: generate announcement-post.txt (mandatory) with the gitignore note and template spec. - NOTICE renumbered to Step 6. - Documented all new policies (backmerge, companion, entry-count strip, indentation, max-bullets cap). - CLI parameters table refreshed. .gitignore: - Added packages/*/changelog/*/announcement-post.txt. The post is a Slack copy-paste working artifact, not a release deliverable. Release notes for 0.10.0: - New packages/sdk/changelog/0.10.0/ folder with CHANGELOG.md, breaking.md, api.md, models.md, CHANGELOG_LLM.md. - Root aggregate packages/sdk/CHANGELOG.md rebuilt with v0.10.0 at top. - packages/sdk/NOTICE refreshed (191 models, 179 JS deps). - packages/sdk/package.json bumped 0.9.1 -> 0.10.0. Backmerge of release-sdk-0.10.0 -> main is a no-op for the release artifacts (changelog, NOTICE) because they land here directly. --- .cursor/skills/sdk-changelog/SKILL.md | 149 +++++- .gitignore | 6 + packages/sdk/CHANGELOG.md | 444 ++++++++++++++++ packages/sdk/NOTICE | 69 +-- packages/sdk/changelog/0.10.0/CHANGELOG.md | 63 +++ .../sdk/changelog/0.10.0/CHANGELOG_LLM.md | 443 ++++++++++++++++ packages/sdk/changelog/0.10.0/api.md | 501 ++++++++++++++++++ packages/sdk/changelog/0.10.0/breaking.md | 278 ++++++++++ packages/sdk/changelog/0.10.0/models.md | 20 + packages/sdk/package.json | 2 +- scripts/sdk/generate-changelog-sdk-pod.cjs | 464 +++++++++++++++- 11 files changed, 2376 insertions(+), 63 deletions(-) create mode 100644 packages/sdk/changelog/0.10.0/CHANGELOG.md create mode 100644 packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md create mode 100644 packages/sdk/changelog/0.10.0/api.md create mode 100644 packages/sdk/changelog/0.10.0/breaking.md create mode 100644 packages/sdk/changelog/0.10.0/models.md diff --git a/.cursor/skills/sdk-changelog/SKILL.md b/.cursor/skills/sdk-changelog/SKILL.md index 126b51ca64..721741c333 100644 --- a/.cursor/skills/sdk-changelog/SKILL.md +++ b/.cursor/skills/sdk-changelog/SKILL.md @@ -21,6 +21,9 @@ Generate changelogs for SDK pod packages following the monorepo GitFlow. ## Workflow +Every step is mandatory. Do **not** ask the user whether to do `CHANGELOG_LLM.md` or +`NOTICE` — they are part of this skill and always run. + ### Step 1: Identify Target Package If the user doesn't specify, ask which SDK pod package they want to generate a changelog for. @@ -56,20 +59,126 @@ With migration flags: node scripts/sdk/generate-changelog-sdk-pod.cjs --package= --base-commit= --base-version= ``` -### Step 4: Generate CHANGELOG_LLM.md (if requested) +The script automatically excludes: + +- PRs tagged `[skiplog]`. +- **Backmerge PRs** (subjects starting with `Backmerge` or `Merge release …`). + Backmerges merge a release branch back into main; their content is already + documented in the release branch's own changelog, so listing them here is noise. +- PRs whose title fails the SDK PR-format validator (these are warned, not silently + dropped — fix the title and re-run, or surface to the PR author). + +For `[mod]` PRs, the script extracts the `Added`/`Updated`/`Removed` model lists +from the PR body and renders them as **indented continuation lines beneath the +bullet** in `CHANGELOG.md` (each section on its own line — never inline as one +giant row). The same filtered lists are written to `models.md`. + +The extractor applies two policies (in this order): + +1. **Companion entries are dropped.** Companions are auxiliary files that ship + alongside a primary model but aren't independently usable — vocab files, + lexicons, raw data shards, metadata blobs. The filter recognises constant + suffixes (`*_LEX`, `*_VOCAB`, `*_DATA`, `*_METADATA`) **and** any free-form + description containing the word "companion". Only first-class models reach + the changelog. +2. **Entry-count suffixes are stripped.** `(N entries)` / + `(N entries — short note)` decorations are removed from the displayed + text — readers can follow the `models.md` link for exact counts. + +After both filters, each section is trimmed to `MAX_INLINE_MODELS` (currently +**5**) entries, with `(and N more)` for the remainder. Example: + +``` +- Regenerate model registry. (see PR [#123](...)) - See [model changes](./models.md) + Added: NMT_Q0F16, NMT_Q4_0 (and 12 more) + Removed: MARIAN_OPUS_* +``` + +If after filtering a section is empty, it's omitted. If all sections are empty +the bullet emits with no continuation lines. + +When writing the human-readable `CHANGELOG_LLM.md` (Step 4), apply the same +"no informational value" rule manually: skip backmerges, automated bumps, and any +entry whose subject would just repeat what a previous release already said. For +the Models section, mirror the script's policy — keep it concise in the body +(highlight the most notable adds/removes) and defer the full constant list to +the `### Added` / `### Removed` blocks at the bottom. + +### Step 4: Generate CHANGELOG_LLM.md (mandatory) + +Always run this step. Do not ask the user — it's part of the skill. + +After raw changelog files exist, generate the human-readable version at +`packages//changelog//CHANGELOG_LLM.md`. -After raw changelog files exist, generate the human-readable version. See [references/changelog-llm-format.md](references/changelog-llm-format.md) for the format guide. +After writing the file, re-run the raw generator (or rebuild the root aggregate) so +`packages//CHANGELOG.md` picks up the new `CHANGELOG_LLM.md` (the aggregator +prefers it over `CHANGELOG.md`). Easiest way: re-run the script from Step 3 — it's idempotent. + +### Step 5: Generate `announcement-post.txt` (mandatory) + +Always run this step after Step 4. It produces a Slack-ready copy-paste post at +`packages//changelog//announcement-post.txt`. + +The file is **gitignored** (`packages/*/changelog/*/announcement-post.txt`) — it's a +local working artifact, not a committed deliverable. Never `git add` it. + +```bash +node scripts/sdk/generate-changelog-sdk-pod.cjs --package= --generate-announcement-post +``` + +The script parses `CHANGELOG.md` for the package's current version (from +`package.json`) and emits the Slack template: + +- `:qvac: SDK :rocket: NPM Public release` header. +- NPM, GitHub release, and full-changelog tree links. +- `:warning: Breaking Changes` section (with link to `breaking.md`) — only if any + PR is breaking. +- `Release Date: YYYY-MM-DD`. +- One Slack section per CHANGELOG.md section (`:sparkles: Features`, + `:electric_plug: API`, `:ladybug: Fixes`, `:package: Models`, `:blue_book: Docs`, + `:test_tube: Tests`, `:broom: Chores`, `:gear: Infrastructure`). +- Each bullet uses `•`, wraps the PR URL in `<...>` (suppresses Slack unfurl), and + appends ` :boom: breaking` when the bullet is breaking. +- Sections are capped at `MAX_ANNOUNCEMENT_BULLETS` (currently **10**). The + `... And much more, see full list in changelog :memo:` line is only added + when a section has *more than 10* entries; anything 10 or fewer is emitted + verbatim. +- Footer: `Thanks to everyone on QVAC team :green_heart: :qvac: :green_heart:`. + +If the post needs hand-tuning (e.g. the Models section needs custom count summaries +that the parser can't infer), edit the file directly. It's gitignored, so changes +won't pollute the diff. + +### Step 6: Update NOTICE file for the target package + +After Step 5 completes, run notice-generate for the same `--package` to ensure +its NOTICE file reflects any dependency changes in the release: + +```bash +source .env +node .cursor/skills/notice-generate/scripts/generate-notice.js +``` + +Do NOT commit the announcement post (gitignored) and let the user review the rest +before committing. + +See `.cursor/skills/notice-generate/SKILL.md` for full details. + ## CLI Parameters -| Flag | Required | Description | -| ---------------- | -------- | ------------------------------------------------------------------ | -| `--package` | Yes | Package name (e.g., `sdk`) | -| `--base-commit` | No | Initial commit SHA for migration (overrides tag lookup) | -| `--base-version` | No | Version label for base commit (display only) | -| `--release-type` | No | `minor` or `patch` (auto-detected from package.json version) | -| `--dry-run` | No | Preview output without writing files | +| Flag | Required | Description | +| ------------------------------- | -------- | ------------------------------------------------------------------ | +| `--package` | Yes | Package name (e.g., `sdk`) | +| `--base-commit` | No | Initial commit SHA for migration (overrides tag lookup) | +| `--base-version` | No | Version label for base commit (display only) | +| `--release-type` | No | `minor` or `patch` (auto-detected from package.json version) | +| `--dry-run` | No | Preview output without writing files | +| `--update-root-changelog` | No | Rebuild only the root aggregate `packages//CHANGELOG.md` | +| `--generate-announcement-post` | No | Generate `announcement-post.txt` for the package's current version | +| `--version` | No | Override version when used with `--generate-announcement-post` | ## Output @@ -79,7 +188,9 @@ Generates changelog files in `packages//changelog//`: - `breaking.md` - Breaking changes detail (if `[bc]` PRs) - `api.md` - API changes detail (if `[api]` PRs) - `models.md` - Model changes (if `[mod]` PRs) -- `CHANGELOG_LLM.md` - Human-readable version (generated separately via Step 4) +- `CHANGELOG_LLM.md` - Human-readable version (always generated, see Step 4) +- `announcement-post.txt` - Slack copy-paste post (always generated, see Step 5, + **gitignored** — never commit) Additionally: @@ -95,19 +206,6 @@ Examples: - `sdk-v0.8.1` (patch — used as base for next patch release) - `rag-v2.0.0` -### Step 5: Update NOTICE file for the target package - -After changelog generation completes, run notice-generate for the same `--package` to ensure its NOTICE file reflects any dependency changes in the release: - -```bash -source .env -node .cursor/skills/notice-generate/scripts/generate-notice.js -``` - -Do NOT commit — the user will review and commit. - -See `.cursor/skills/notice-generate/SKILL.md` for full details. - ## Quality Checklist Before completing: @@ -116,9 +214,10 @@ Before completing: - [ ] Base reference resolved (tag or `--base-commit`) - [ ] PRs scoped to package path only - [ ] Changelog files written to correct version directory -- [ ] If CHANGELOG_LLM.md requested, follows format guide +- [ ] CHANGELOG_LLM.md generated (mandatory) and follows format guide +- [ ] announcement-post.txt generated (mandatory, gitignored) - [ ] NOTICE file updated for the target package -- [ ] Root CHANGELOG.md rebuilt from all version folders +- [ ] Root CHANGELOG.md rebuilt from all version folders (and picks up CHANGELOG_LLM.md) - [ ] Versions sorted in descending semver order - [ ] No duplicated versions - [ ] Root file is deterministic (fully regenerated) diff --git a/.gitignore b/.gitignore index a220715d88..ad7677aca3 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,12 @@ packages/**/.npmrc packages/sdk/bun.lock NOTICE_LOG.txt NOTICE_FULL_REPORT.txt + +# Slack/Discord copy-paste announcement posts generated by the +# changelog skill (see scripts/sdk/generate-changelog-sdk-pod.cjs +# --generate-announcement-post). These are local working artifacts, +# not release deliverables — never commit them. +packages/*/changelog/*/announcement-post.txt .claude/worktrees # Auto-generated agent config (run /setup to regenerate) diff --git a/packages/sdk/CHANGELOG.md b/packages/sdk/CHANGELOG.md index d7eb342a25..b7ad3bc6a4 100644 --- a/packages/sdk/CHANGELOG.md +++ b/packages/sdk/CHANGELOG.md @@ -1,5 +1,449 @@ # Changelog +## [0.10.0] + +📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.10.0 + +This release lands a redesigned completion API built on a unified event stream, a generic +companion-set system that handles multi-file models in parallel, and a much stronger model +type/capability system that catches mis-routed calls at compile time. It also rewires +delegated inference to direct DHT connections, expands the addon surface (img2img, +structured output, dynamic tools, tool dialects, per-segment whisper metadata, +sentence-streaming TTS), and reshapes the model registry around companion sets. + +## Breaking Changes + +### Unified `CompletionEvent` stream + +`completion()` now returns a `CompletionRun` with a single canonical `events` stream that +carries content, thinking, tool calls, stats, and completion in one ordered, sequenced +sequence. The legacy `tokenStream`/`stats` fields still work as derived views, but the +event stream is the authoritative API going forward and is what enables features like +captured thinking and structured tool framing. + +**Before:** + +```typescript +const result = completion({ modelId, history, stream: true }); +for await (const token of result.tokenStream) { /* ... */ } +const stats = await result.stats; +``` + +**After:** + +```typescript +const run = completion({ modelId, history, stream: true, captureThinking: true }); +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); + if (event.type === "toolCall") console.log(event.call.name); +} +const result = await run.final; +// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText +``` + +### Model type & capability system overhaul + +`LoadModelOptions` is no longer a single catch-all. Custom plugins must use the new +`LoadCustomPluginModelOptions<"plugin-name">` generic so the literal plugin string is +pinned at the type level. Built-in model types continue to pick the right overload +automatically when the annotation is dropped. + +At runtime, built-in SDK operations now throw `MODEL_OPERATION_NOT_SUPPORTED` when called +against the wrong model type — with a message that lists the requested operation, the +loaded model's type, and the supported operations on it. The lower-level `pluginInvoke` +and `pluginInvokeStream` paths still surface `PLUGIN_HANDLER_NOT_FOUND` as before. + +`translate(...)` now routes by the loaded model's registered type. Passing a mismatched +`modelType` throws `ModelTypeMismatchError` instead of silently mis-routing the call. + +**Before:** + +```typescript +import type { LoadModelOptions } from "@qvac/sdk"; + +const opts: LoadModelOptions = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +``` + +**After:** + +```typescript +import type { LoadCustomPluginModelOptions } from "@qvac/sdk"; + +const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +// Or just drop the annotation — TS picks the right overload. +``` + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) { + // Includes requested operation, loaded model type, supported operations, + // and suggested model types. + } +} +``` + +### Companion-set download progress field + +Multi-file model downloads (ONNX, future formats) now report progress through a generic +`fileSetInfo` field instead of the ONNX-specific `onnxInfo`. The shape is identical, only +the field name changed. + +**Before:** + +```typescript +onProgress: (progress) => { + if (progress.onnxInfo) { + console.log(`[${progress.onnxInfo.currentFile}] ${progress.onnxInfo.overallPercentage.toFixed(1)}%`); + } +} +``` + +**After:** + +```typescript +onProgress: (progress) => { + if (progress.fileSetInfo) { + console.log(`[${progress.fileSetInfo.currentFile}] ${progress.fileSetInfo.overallPercentage.toFixed(1)}%`); + } +} +``` + +### Delegated inference uses direct DHT connect + +Delegation no longer rendezvous over a shared topic. Consumers connect directly to a +provider's public key via `swarm.dht.connect(publicKey)`, and providers bind the DHT +server with `swarm.listen()` instead of announcing a topic. This removes a class of +discovery-flake failures and shortens connect time. Callers using the high-level +delegation API see no surface change; integrators driving Hyperswarm directly should +update their join/listen logic. + +### Plugin constructor migration + +SDK plugins (`definePlugin`) now use the new addon constructor shape. Plugin authors +need to migrate their `createModel` implementation to match — the SDK in this release +ships with all first-party plugins already migrated. + +## New APIs and Capabilities + +### `getLoadedModelInfo` for runtime introspection + +A new `getLoadedModelInfo` API returns metadata for a loaded `modelId`, discriminated on +`isDelegated`. Local models expose their authoritative handler list and `modelType`; +delegated models defer to the provider. Useful for preflighting a built-in SDK call +before issuing the RPC. + +```typescript +import { getLoadedModelInfo, transcribe } from "@qvac/sdk"; + +const info = await getLoadedModelInfo({ modelId }); + +if (info.isDelegated || info.handlers.includes("transcribeStream")) { + await transcribe({ modelId /* ... */ }); +} +``` + +### Structured output (`responseFormat`) + +`completion()` now accepts a `responseFormat` option that constrains the model to emit +schema-valid JSON. The output is guaranteed to parse against the supplied JSON Schema. + +```typescript +const run = completion({ + modelId, + history: [{ role: "user", content: "Extract: I'm Alice, 30, data engineer." }], + stream: true, + responseFormat: { + type: "json_schema", + json_schema: { + name: "Person", + schema: { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "integer" }, + occupation: { type: "string" }, + }, + required: ["name", "age", "occupation"], + additionalProperties: false, + }, + }, + }, +}); + +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); +} +const final = await run.final; +JSON.parse(final.contentText); // schema-valid +``` + +### Dynamic tools mode + +LLM models can now opt into a `dynamic` tools mode at load time. Subsequent +`completion()` calls can pass an entirely different `tools` array on each turn, and the +addon trims the previous tool block from the KV cache so rotation is free — no need to +invalidate the cache or pin the tool set per-session. + +```typescript +import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk"; + +const modelId = await loadModel({ + modelSrc: QWEN3_1_7B_INST_Q4, + modelType: "llm", + modelConfig: { + ctx_size: 4096, + tools: true, + toolsMode: TOOLS_MODE.dynamic, + }, +}); + +// Turn 1 — weather tools. +const turn1 = completion({ + modelId, history, kvCache, stream: true, + tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }], +}); + +// Turn 2 — same kvCache, different tools. Free rotation. +const turn2 = completion({ + modelId, history, kvCache, stream: true, + tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }], +}); +``` + +### Tool-call dialect routing + +Tool-call parsing is now dialect-aware. The SDK auto-detects between `hermes`, +`pythonic`, and `json` framings, and a new `toolDialect` parameter lets you force a +specific parser when auto-detection picks the wrong path — common for Llama 3.x +fine-tunes that emit native pythonic headers, which the auto-router defaults to `hermes` +for empirical reasons. + +```typescript +import { completion, type ToolDialect } from "@qvac/sdk"; + +const result = completion({ + modelId, history, tools, stream: true, + toolDialect: "pythonic", // "hermes" | "pythonic" | "json" +}); +``` + +### img2img for diffusion models + +The diffusion API now accepts an `init_image` for SDEdit-style image-to-image on +SD/SDXL, and in-context conditioning on FLUX.2. `strength` controls how much of the +source is preserved on SD/SDXL; FLUX.2 ignores it (the path is purely conditional). + +```typescript +const initImage = new Uint8Array(fs.readFileSync("input.png")); +const { outputs } = diffusion({ + modelId, + prompt: "oil painting style, vibrant colors", + init_image: initImage, + strength: 0.5, // 0 = keep source, 1 = ignore source +}); +``` + +### Sentence-level TTS streaming + +Onnx text-to-speech can now stream output one sentence at a time, either as a +self-contained `textToSpeech({ stream: true, sentenceStream: true })` call or via a +duplex `textToSpeechStream` session that you can pipe a streaming LLM into. Each chunk +exposes the int16 PCM samples plus the source sentence and chunk index. + +```typescript +const session = await textToSpeechStream({ + modelId: ttsModelId, + inputType: "text", + accumulateSentences: true, + sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual" + flushAfterMs: 400, +}); + +(async () => { + for await (const delta of completion({ modelId: llmModelId /* ... */ }).tokenStream) { + session.write(delta); + } + session.end(); +})(); + +for await (const chunk of session) { + // chunk.buffer / chunk.chunkIndex / chunk.sentenceChunk + if (chunk.done) break; +} +``` + +### Per-segment whisper metadata + +Both `transcribe` (batch) and `transcribeStream` (duplex) now return structured +`TranscribeSegment` objects with start/end timestamps, segment IDs, and an `append` +flag — enabling proper subtitle generation and timeline alignment instead of raw text +concatenation. + +```typescript +const segments = await transcribe({ modelId, audioChunk: audioFilePath, metadata: true }); +for (const s of segments) { + console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`); +} +``` + +### Suspend lifecycle gate and `state()` + +`suspend()` is now serialized through a lifecycle gate that prevents overlapping +suspend/resume races. A new `state()` API reports the current lifecycle phase: +`active`, `suspending`, `suspended`, or `resuming`. + +```typescript +import { state, suspend, resume, type LifecycleState } from "@qvac/sdk"; + +await suspend(); +const current: LifecycleState = await state(); +if (current !== "active") { + await resume(); +} +``` + +### Registry download retries and configurable stream timeout + +Two new SDK config knobs cover slow/unstable links: `registryDownloadMaxRetries` retries +`REQUEST_TIMEOUT` failures (set to `0` to disable), and `registryStreamTimeoutMs` +extends the per-block stream timeout beyond the default 60s. + +```typescript +import { setSDKConfig } from "@qvac/sdk"; + +setSDKConfig({ + registryDownloadMaxRetries: 5, + registryStreamTimeoutMs: 180_000, +}); +``` + +### Auto KV-cache: replay the canonical assistant turn + +When auto KV-cache is enabled, the completion result now exposes +`final.cacheableAssistantContent` — the exact assistant string the SDK persisted to the +cache key on this turn. Push it back into `history` verbatim on the next turn to +guarantee a cache hit. Tool-call turns aren't auto-cached today and omit the field; +fall back to `final.contentText` in that case. + +```typescript +const run = completion({ modelId, history, kvCache: true }); +for await (const _ of run.tokenStream) { /* stream */ } +const final = await run.final; +const nextHistory = [ + ...history, + { role: "assistant", content: final.cacheableAssistantContent ?? final.contentText }, + { role: "user", content: "follow-up question" }, +]; +``` + +### LLM-addon cache API plumbed through SDK + +The SDK now wires through the LLM addon's first-class cache API — including explicit +`deleteCache({ kvCacheKey })` for evicting a named cache key — so consumers can manage +KV-cache lifetimes alongside `loadModel`/`unloadModel`. + +### NMTcpp 2.0.1 surface + +The SDK NMT plugin now targets `@qvac/translation-nmtcpp 2.0.1` with a structured +constructor that distinguishes primary and pivot model files, vocab files, and pivot +config (beam size, top-k). Bergamot models are also picked up via path-based vocab +resolution and grouped into companion sets, which lets the cache and download paths +treat them like any other multi-file model. + +## Features + +### Parallel orchestration and download dedupe + +Model loading is now genuinely parallel where it can be: the primary model and any +companion files (vision projection, vocab, etc.) download concurrently, and concurrent +requests for the same asset are deduplicated to a single transfer. Cancellation cleans +up all active transfers atomically with no leaked state. Profiling fields +(`sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`, +`modelInitializationTime`, `checksumValidationTime`) are populated correctly across +both primary and companion downloads, with aggregate stats merged at the run level. + +The companion pipeline is also generic: `companions.ts` is the only format-aware piece, +and adding a new multi-file format is a matter of dropping in a detection function and +registering it with `groupCompanionSets`. Everything downstream — codegen, resolver, +cache probing, storage cleanup — handles it automatically. + +### Real-time voice assistant example + +A new end-to-end example demonstrates a real-time voice assistant pipeline (whisper → +LLM → TTS) wired together using the SDK's streaming primitives. + +## Bug Fixes + +- RPC initialization in the Node runtime now has an explicit timeout, so a wedged + transport can no longer hang `loadModel`/`unloadModel` indefinitely. +- The registry client now opens its corestore with `wait: true`, eliminating a startup + race where downloads could begin before replication was ready. +- KV-cache `savedCount` is no longer incremented on cancelled or zero-token turns, + preventing inflated cache stats. +- `delete-cache` RPC now scopes invalidation to the deleted key only instead of wiping + unrelated entries. +- Delegated transports strip the `__profiling` envelope before zod validation, fixing a + spurious validation error when profiling is enabled on the consumer side. +- Replaced `z.xor` with `z.union` and bumped the zod floor to `^4.3.0` to track upstream + breaking changes. +- LLM-based translation now uses deterministic decoding so the same input produces the + same output across runs. +- Inflight delegation requests that get rejected now run their cleanup chain to + completion instead of leaking pending promises. + +## Model Registry Changes + +The model registry was regenerated around companion-set metadata. The user-facing surface +is leaner: families that used to live as separate `*_DATA`, `*_LEX`, `*_VOCAB`, and +`METADATA_*` constants are now companion-only — they're still downloaded, but they're +not addressable as standalone model sources. Marian Opus models were renamed under the +`NMT_*` namespace to match the rest of the NMT family. + +### Added + +``` +NMT_Q0F16 through NMT_Q0F16_9 (10 entries) +NMT_Q4_0 through NMT_Q4_0_12+ (22 entries) +``` + +### Removed (now companion-only or renamed) + +``` +*_DATA (32 entries — companion-only, e.g. PARAKEET_TDT_ENCODER_DATA_FP32, TTS_*_DATA) +BERGAMOT_*_LEX (93 entries — companion-only) +BERGAMOT_*_VOCAB (93 entries — companion-only) +BERGAMOT_METADATA_* (87 entries — companion-only) +MARIAN_OPUS_* (32 entries — renamed to NMT_*) +``` + +## Documentation, Tests, and Infrastructure + +- Diffusion documentation was extended to cover the new img2img flows (SDEdit on + SD/SDXL, in-context conditioning on FLUX.2). +- Android sharded-model-resume tests no longer trip Scudo OOM — the test harness now + bounds memory more conservatively on long-running resume scenarios. +- The tests-qvac docs, tooling, and CI workflow job names were refreshed for the new + suite filtering and PR-triggered e2e workflows. Suite filtering plus PR-trigger labels + let CI run targeted SDK e2e subsets on demand instead of always running the full grid. +- A pre-terminate cleanup hook stabilises mobile smoke: the mobile auto-close path now + awaits worker cleanup acknowledgement before terminating the worklet. +- `DataLoader` cleanup logic was scoped down to `packages/rag` so the SDK no longer + carries that surface. + ## [0.9.1] 📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.9.1 diff --git a/packages/sdk/NOTICE b/packages/sdk/NOTICE index 7d2b63cb90..cc94473e7c 100644 --- a/packages/sdk/NOTICE +++ b/packages/sdk/NOTICE @@ -328,7 +328,7 @@ Third-Party Model Licenses model.enis.intgemm.alphas https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enis model.enit.intgemm.alphas - https://github.com/mozilla/firefox-translations-models/tree/main/models/tiny/enit + https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enit model.enja.intgemm.alphas https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enja model.enkn.intgemm.alphas @@ -372,7 +372,7 @@ Third-Party Model Licenses model.enzh.intgemm.alphas https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh model.esen.intgemm.alphas - https://github.com/mozilla/firefox-translations-models/tree/main/models/tiny/esen + https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/esen model.eten.intgemm.alphas https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/eten model.faen.intgemm.alphas @@ -489,19 +489,26 @@ JavaScript Dependencies @hyperswarm/secret-stream@6.9.1 https://github.com/holepunchto/hyperswarm-secret-stream - @qvac/decoder-audio@0.3.8 + @qvac/decoder-audio@0.3.6 + https://github.com/tetherto/qvac + @qvac/decoder-audio@0.3.7 https://github.com/tetherto/qvac @qvac/diagnostics@0.1.1 @qvac/diffusion-cpp@0.3.0 https://github.com/tetherto/qvac + @qvac/dl-base@0.1.1 + @qvac/dl-hyperdrive@0.1.1 @qvac/embed-llamacpp@0.14.0 https://github.com/tetherto/qvac @qvac/error@0.1.1 + @qvac/infer-base@0.1.1 + @qvac/infer-base@0.4.0 + https://github.com/tetherto/qvac @qvac/infer-base@0.4.1 https://github.com/tetherto/qvac @qvac/langdetect-text@0.1.2 https://github.com/tetherto/qvac - @qvac/llm-llamacpp@0.16.0 + @qvac/llm-llamacpp@0.17.3 https://github.com/tetherto/qvac @qvac/logging@0.1.0 @qvac/ocr-onnx@0.4.2 @@ -514,9 +521,9 @@ JavaScript Dependencies https://github.com/tetherto/qvac @qvac/registry-schema@0.1.2 @qvac/response@0.1.2 - @qvac/transcription-parakeet@0.3.2 + @qvac/transcription-parakeet@0.3.1 https://github.com/tetherto/qvac - @qvac/transcription-whispercpp@0.6.4 + @qvac/transcription-whispercpp@0.6.1 https://github.com/tetherto/qvac @qvac/translation-nmtcpp@2.1.0 https://github.com/tetherto/qvac @@ -552,19 +559,19 @@ JavaScript Dependencies https://github.com/holepunchto/bare-events bare-events@2.8.2 https://github.com/holepunchto/bare-events - bare-fetch@2.9.0 + bare-fetch@2.8.1 https://github.com/holepunchto/bare-fetch bare-ffmpeg@1.2.2 https://github.com/holepunchto/bare-ffmpeg bare-form-data@1.2.1 https://github.com/holepunchto/bare-form-data - bare-fs@4.7.1 + bare-fs@4.6.0 https://github.com/holepunchto/bare-fs bare-hrtime@2.1.1 https://github.com/holepunchto/bare-hrtime bare-http-parser@1.1.3 https://github.com/holepunchto/bare-http-parser - bare-http1@4.5.6 + bare-http1@4.5.5 https://github.com/holepunchto/bare-http1 bare-https@2.1.3 https://github.com/holepunchto/bare-https @@ -574,9 +581,7 @@ JavaScript Dependencies https://github.com/holepunchto/bare-lief bare-link@3.2.1 https://github.com/holepunchto/bare-link - bare-mime@1.0.0 - https://github.com/holepunchto/bare-mime - bare-module@6.2.0 + bare-module@6.1.3 https://github.com/holepunchto/bare-module bare-module-lexer@1.4.7 https://github.com/holepunchto/bare-module-lexer @@ -590,7 +595,7 @@ JavaScript Dependencies https://github.com/holepunchto/bare-node bare-node-worker-threads@1.0.0 https://github.com/holepunchto/bare-node - bare-os@3.9.0 + bare-os@3.8.7 https://github.com/holepunchto/bare-os bare-path@3.0.0 https://github.com/holepunchto/bare-path @@ -600,39 +605,39 @@ JavaScript Dependencies https://github.com/holepunchto/bare-process bare-rpc@1.2.0 https://github.com/holepunchto/bare-rpc - bare-runtime@1.28.4 + bare-runtime@1.28.1 https://github.com/holepunchto/bare-runtime - bare-runtime-darwin-arm64@1.28.4 + bare-runtime-darwin-arm64@1.28.1 https://github.com/holepunchto/bare-runtime - bare-semver@1.0.3 + bare-semver@1.0.2 https://github.com/holepunchto/bare-semver bare-signals@4.2.0 https://github.com/holepunchto/bare-signals bare-stdio@1.0.2 https://github.com/holepunchto/bare-stdio - bare-stream@2.13.1 + bare-stream@2.12.0 https://github.com/holepunchto/bare-stream - bare-structured-clone@1.5.4 + bare-structured-clone@1.5.3 https://github.com/holepunchto/bare-structured-clone bare-subprocess@5.2.3 https://github.com/holepunchto/bare-subprocess - bare-tcp@2.2.12 + bare-tcp@2.2.7 https://github.com/holepunchto/bare-tcp bare-thread@1.2.0 https://github.com/holepunchto/bare-thread - bare-tls@2.2.3 + bare-tls@2.2.1 https://github.com/holepunchto/bare-tls bare-tty@5.1.0 https://github.com/holepunchto/bare-tty bare-type@1.1.0 https://github.com/holepunchto/bare-type - bare-url@2.4.2 + bare-url@2.4.0 https://github.com/holepunchto/bare-url bare-worker@4.1.6 https://github.com/holepunchto/bare-worker - bare-zlib@1.3.3 + bare-zlib@1.3.1 https://github.com/holepunchto/bare-zlib - blind-relay@1.5.0 + blind-relay@1.4.0 https://github.com/holepunchto/blind-relay compact-encoding@2.19.2 https://github.com/holepunchto/compact-encoding @@ -642,7 +647,7 @@ JavaScript Dependencies https://github.com/holepunchto/events-universal fd-lock@2.1.1 https://github.com/holepunchto/fd-lock - fs-native-extensions@1.5.0 + fs-native-extensions@1.4.5 https://github.com/holepunchto/fs-native-extensions hyperblobs@2.11.1 https://github.com/holepunchto/hyperblobs @@ -656,8 +661,6 @@ JavaScript Dependencies https://github.com/holepunchto/hypercore-storage hyperdb@4.22.3 https://github.com/holepunchto/hyperdb - hyperdht-address@1.0.1 - https://github.com/holepunchto/hyperdht-address hyperdht-stats@1.10.0 https://github.com/holepunchto/hyperdht-stats hyperdispatch@1.5.1 @@ -670,7 +673,7 @@ JavaScript Dependencies https://github.com/holepunchto/hyperswarm-stats index-encoder@3.5.0 https://github.com/holepunchto/index-encoder - mirror-drive@1.14.2 + mirror-drive@1.14.1 https://github.com/holepunchto/mirror-drive noise-handshake@4.2.0 https://github.com/holepunchto/noise-handshake @@ -742,8 +745,8 @@ JavaScript Dependencies https://github.com/holepunchto/corestore debounceify@1.1.0 https://github.com/mafintosh/debounceify - dht-rpc@6.26.4 - https://github.com/holepunchto/dht-rpc + dht-rpc@6.26.3 + https://github.com/mafintosh/dht-rpc events@3.3.0 https://github.com/Gozala/events fast-fifo@1.3.2 @@ -762,7 +765,7 @@ JavaScript Dependencies https://github.com/holepunchto/hypercore hypercore-crypto@3.6.1 https://github.com/mafintosh/hypercore-crypto - hyperdht@6.30.0 + hyperdht@6.29.6 https://github.com/holepunchto/hyperdht hyperswarm@4.17.0 https://github.com/holepunchto/hyperswarm @@ -780,8 +783,8 @@ JavaScript Dependencies https://github.com/mafintosh/nat-sampler protocol-buffers-encodings@1.2.0 https://github.com/mafintosh/protocol-buffers-encodings - protomux@3.10.3 - https://github.com/holepunchto/protomux + protomux@3.10.1 + https://github.com/mafintosh/protomux queue-tick@1.0.1 https://github.com/mafintosh/queue-tick random-array-iterator@1.0.0 @@ -816,6 +819,8 @@ JavaScript Dependencies https://github.com/mafintosh/tar-stream teex@1.0.1 https://github.com/mafintosh/teex + test-tmp@1.4.0 + https://github.com/mafintosh/test-tmp time-ordered-set@2.0.1 https://github.com/mafintosh/time-ordered-set timeout-refresh@2.0.1 diff --git a/packages/sdk/changelog/0.10.0/CHANGELOG.md b/packages/sdk/changelog/0.10.0/CHANGELOG.md new file mode 100644 index 0000000000..6b2304420e --- /dev/null +++ b/packages/sdk/changelog/0.10.0/CHANGELOG.md @@ -0,0 +1,63 @@ +# Changelog v0.10.0 + +Release Date: 2026-05-01 + +## ✨ Features + +- Add real-time voice assistant example. (see PR [#1631](https://github.com/tetherto/qvac/pull/1631)) +- Add parallel orchestration, download dedupe, and generic companion-set support. (see PR [#1636](https://github.com/tetherto/qvac/pull/1636)) - See [breaking changes](./breaking.md) +- Unified CompletionEvent stream as canonical completion API. (see PR [#1673](https://github.com/tetherto/qvac/pull/1673)) - See [breaking changes](./breaking.md) +- Add Bergamot NMT companion-set grouping and path-based vocab resolution. (see PR [#1707](https://github.com/tetherto/qvac/pull/1707)) +- Switch delegation to direct DHT connect, drop topic end-to-end. (see PR [#1729](https://github.com/tetherto/qvac/pull/1729)) - See [breaking changes](./breaking.md) + +## 🔌 API + +- Update SDK nmtcpp plugin for @qvac/translation-nmtcpp 2.0.1. (see PR [#1563](https://github.com/tetherto/qvac/pull/1563)) - See [API changes](./api.md) +- Add sentence-level streaming for onnx text-to-speech. (see PR [#1590](https://github.com/tetherto/qvac/pull/1590)) - See [API changes](./api.md) +- Support the new llm addon cache api in sdk. (see PR [#1633](https://github.com/tetherto/qvac/pull/1633)) - See [API changes](./api.md) +- Add img2img support to SDK diffusion API. (see PR [#1662](https://github.com/tetherto/qvac/pull/1662)) - See [API changes](./api.md) +- Harden suspend with lifecycle gate and add state() api. (see PR [#1691](https://github.com/tetherto/qvac/pull/1691)) - See [API changes](./api.md) +- Propagate whisper per-segment metadata to SDK users. (see PR [#1701](https://github.com/tetherto/qvac/pull/1701)) - See [API changes](./api.md) +- Make auto KV-cache reuse completed turn history. (see PR [#1705](https://github.com/tetherto/qvac/pull/1705)) - See [API changes](./api.md) +- Propagate registry download retries and expose stream timeout. (see PR [#1743](https://github.com/tetherto/qvac/pull/1743)) - See [API changes](./api.md) +- Improve model type & capability system. (see PR [#1748](https://github.com/tetherto/qvac/pull/1748)) - See [breaking changes](./breaking.md), [API changes](./api.md) +- Add responseFormat for structured output. (see PR [#1768](https://github.com/tetherto/qvac/pull/1768)) - See [API changes](./api.md) +- Sdk "dynamic" tools mode. (see PR [#1779](https://github.com/tetherto/qvac/pull/1779)) - See [API changes](./api.md) +- Pre-terminate cleanup hook + stabilise mobile smoke. (see PR [#1797](https://github.com/tetherto/qvac/pull/1797)) - See [API changes](./api.md) +- Add native tool-call dialect routing (hermes, pythonic, json) with override. (see PR [#1802](https://github.com/tetherto/qvac/pull/1802)) - See [API changes](./api.md) + +## 🐞 Fixes + +- Add timeout to RPC initialization in Node runtime. (see PR [#1550](https://github.com/tetherto/qvac/pull/1550)) +- Enable corestoreOpts: { wait: true } for registry client. (see PR [#1699](https://github.com/tetherto/qvac/pull/1699)) +- Skip kv-cache savedCount on cancelled or zero-token turns. (see PR [#1737](https://github.com/tetherto/qvac/pull/1737)) +- Scope kv-cache invalidation to deleted key on RPC delete-cache. (see PR [#1740](https://github.com/tetherto/qvac/pull/1740)) +- Strip __profiling envelope in delegate transport before zod validation. (see PR [#1767](https://github.com/tetherto/qvac/pull/1767)) +- Replace z.xor with z.union, bump zod floor to ^4.3.0. (see PR [#1790](https://github.com/tetherto/qvac/pull/1790)) +- Deterministic decoding for LLM translate. (see PR [#1808](https://github.com/tetherto/qvac/pull/1808)) +- Handle inflight delegation rejection cleanup chain. (see PR [#1811](https://github.com/tetherto/qvac/pull/1811)) + +## 📦 Models + +- Regenerate model registry with companion-set metadata. (see PR [#1700](https://github.com/tetherto/qvac/pull/1700)) - See [model changes](./models.md) + Added: NMT_Q0F16 through NMT_Q0F16_9, NMT_Q4_0 through NMT_Q4_0_12+ + Removed: MARIAN_OPUS_* + +## 📘 Docs + +- Content update - SDK - diffusion - add img2img gen. (see PR [#1796](https://github.com/tetherto/qvac/pull/1796)) + +## 🧪 Tests + +- Fix android sharded-model-resume scudo oom. (see PR [#1831](https://github.com/tetherto/qvac/pull/1831)) + +## 🧹 Chores + +- Migrate SDK plugins to new addon constructor shape. (see PR [#1688](https://github.com/tetherto/qvac/pull/1688)) - See [breaking changes](./breaking.md) +- Refresh tests-qvac docs, tooling, and workflow job names. (see PR [#1712](https://github.com/tetherto/qvac/pull/1712)) +- Scope down DataLoader cleanup to packages/rag. (see PR [#1754](https://github.com/tetherto/qvac/pull/1754)) + +## ⚙️ Infrastructure + +- Add suite filtering and PR-triggered e2e test workflows for SDK. (see PR [#1653](https://github.com/tetherto/qvac/pull/1653)) + diff --git a/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md b/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md new file mode 100644 index 0000000000..6dcc30a493 --- /dev/null +++ b/packages/sdk/changelog/0.10.0/CHANGELOG_LLM.md @@ -0,0 +1,443 @@ +# QVAC SDK v0.10.0 Release Notes + +📦 **NPM:** https://www.npmjs.com/package/@qvac/sdk/v/0.10.0 + +This release lands a redesigned completion API built on a unified event stream, a generic +companion-set system that handles multi-file models in parallel, and a much stronger model +type/capability system that catches mis-routed calls at compile time. It also rewires +delegated inference to direct DHT connections, expands the addon surface (img2img, +structured output, dynamic tools, tool dialects, per-segment whisper metadata, +sentence-streaming TTS), and reshapes the model registry around companion sets. + +## Breaking Changes + +### Unified `CompletionEvent` stream + +`completion()` now returns a `CompletionRun` with a single canonical `events` stream that +carries content, thinking, tool calls, stats, and completion in one ordered, sequenced +sequence. The legacy `tokenStream`/`stats` fields still work as derived views, but the +event stream is the authoritative API going forward and is what enables features like +captured thinking and structured tool framing. + +**Before:** + +```typescript +const result = completion({ modelId, history, stream: true }); +for await (const token of result.tokenStream) { /* ... */ } +const stats = await result.stats; +``` + +**After:** + +```typescript +const run = completion({ modelId, history, stream: true, captureThinking: true }); +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); + if (event.type === "toolCall") console.log(event.call.name); +} +const result = await run.final; +// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText +``` + +### Model type & capability system overhaul + +`LoadModelOptions` is no longer a single catch-all. Custom plugins must use the new +`LoadCustomPluginModelOptions<"plugin-name">` generic so the literal plugin string is +pinned at the type level. Built-in model types continue to pick the right overload +automatically when the annotation is dropped. + +At runtime, built-in SDK operations now throw `MODEL_OPERATION_NOT_SUPPORTED` when called +against the wrong model type — with a message that lists the requested operation, the +loaded model's type, and the supported operations on it. The lower-level `pluginInvoke` +and `pluginInvokeStream` paths still surface `PLUGIN_HANDLER_NOT_FOUND` as before. + +`translate(...)` now routes by the loaded model's registered type. Passing a mismatched +`modelType` throws `ModelTypeMismatchError` instead of silently mis-routing the call. + +**Before:** + +```typescript +import type { LoadModelOptions } from "@qvac/sdk"; + +const opts: LoadModelOptions = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +``` + +**After:** + +```typescript +import type { LoadCustomPluginModelOptions } from "@qvac/sdk"; + +const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +// Or just drop the annotation — TS picks the right overload. +``` + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) { + // Includes requested operation, loaded model type, supported operations, + // and suggested model types. + } +} +``` + +### Companion-set download progress field + +Multi-file model downloads (ONNX, future formats) now report progress through a generic +`fileSetInfo` field instead of the ONNX-specific `onnxInfo`. The shape is identical, only +the field name changed. + +**Before:** + +```typescript +onProgress: (progress) => { + if (progress.onnxInfo) { + console.log(`[${progress.onnxInfo.currentFile}] ${progress.onnxInfo.overallPercentage.toFixed(1)}%`); + } +} +``` + +**After:** + +```typescript +onProgress: (progress) => { + if (progress.fileSetInfo) { + console.log(`[${progress.fileSetInfo.currentFile}] ${progress.fileSetInfo.overallPercentage.toFixed(1)}%`); + } +} +``` + +### Delegated inference uses direct DHT connect + +Delegation no longer rendezvous over a shared topic. Consumers connect directly to a +provider's public key via `swarm.dht.connect(publicKey)`, and providers bind the DHT +server with `swarm.listen()` instead of announcing a topic. This removes a class of +discovery-flake failures and shortens connect time. Callers using the high-level +delegation API see no surface change; integrators driving Hyperswarm directly should +update their join/listen logic. + +### Plugin constructor migration + +SDK plugins (`definePlugin`) now use the new addon constructor shape. Plugin authors +need to migrate their `createModel` implementation to match — the SDK in this release +ships with all first-party plugins already migrated. + +## New APIs and Capabilities + +### `getLoadedModelInfo` for runtime introspection + +A new `getLoadedModelInfo` API returns metadata for a loaded `modelId`, discriminated on +`isDelegated`. Local models expose their authoritative handler list and `modelType`; +delegated models defer to the provider. Useful for preflighting a built-in SDK call +before issuing the RPC. + +```typescript +import { getLoadedModelInfo, transcribe } from "@qvac/sdk"; + +const info = await getLoadedModelInfo({ modelId }); + +if (info.isDelegated || info.handlers.includes("transcribeStream")) { + await transcribe({ modelId /* ... */ }); +} +``` + +### Structured output (`responseFormat`) + +`completion()` now accepts a `responseFormat` option that constrains the model to emit +schema-valid JSON. The output is guaranteed to parse against the supplied JSON Schema. + +```typescript +const run = completion({ + modelId, + history: [{ role: "user", content: "Extract: I'm Alice, 30, data engineer." }], + stream: true, + responseFormat: { + type: "json_schema", + json_schema: { + name: "Person", + schema: { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "integer" }, + occupation: { type: "string" }, + }, + required: ["name", "age", "occupation"], + additionalProperties: false, + }, + }, + }, +}); + +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); +} +const final = await run.final; +JSON.parse(final.contentText); // schema-valid +``` + +### Dynamic tools mode + +LLM models can now opt into a `dynamic` tools mode at load time. Subsequent +`completion()` calls can pass an entirely different `tools` array on each turn, and the +addon trims the previous tool block from the KV cache so rotation is free — no need to +invalidate the cache or pin the tool set per-session. + +```typescript +import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk"; + +const modelId = await loadModel({ + modelSrc: QWEN3_1_7B_INST_Q4, + modelType: "llm", + modelConfig: { + ctx_size: 4096, + tools: true, + toolsMode: TOOLS_MODE.dynamic, + }, +}); + +// Turn 1 — weather tools. +const turn1 = completion({ + modelId, history, kvCache, stream: true, + tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }], +}); + +// Turn 2 — same kvCache, different tools. Free rotation. +const turn2 = completion({ + modelId, history, kvCache, stream: true, + tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }], +}); +``` + +### Tool-call dialect routing + +Tool-call parsing is now dialect-aware. The SDK auto-detects between `hermes`, +`pythonic`, and `json` framings, and a new `toolDialect` parameter lets you force a +specific parser when auto-detection picks the wrong path — common for Llama 3.x +fine-tunes that emit native pythonic headers, which the auto-router defaults to `hermes` +for empirical reasons. + +```typescript +import { completion, type ToolDialect } from "@qvac/sdk"; + +const result = completion({ + modelId, history, tools, stream: true, + toolDialect: "pythonic", // "hermes" | "pythonic" | "json" +}); +``` + +### img2img for diffusion models + +The diffusion API now accepts an `init_image` for SDEdit-style image-to-image on +SD/SDXL, and in-context conditioning on FLUX.2. `strength` controls how much of the +source is preserved on SD/SDXL; FLUX.2 ignores it (the path is purely conditional). + +```typescript +const initImage = new Uint8Array(fs.readFileSync("input.png")); +const { outputs } = diffusion({ + modelId, + prompt: "oil painting style, vibrant colors", + init_image: initImage, + strength: 0.5, // 0 = keep source, 1 = ignore source +}); +``` + +### Sentence-level TTS streaming + +Onnx text-to-speech can now stream output one sentence at a time, either as a +self-contained `textToSpeech({ stream: true, sentenceStream: true })` call or via a +duplex `textToSpeechStream` session that you can pipe a streaming LLM into. Each chunk +exposes the int16 PCM samples plus the source sentence and chunk index. + +```typescript +const session = await textToSpeechStream({ + modelId: ttsModelId, + inputType: "text", + accumulateSentences: true, + sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual" + flushAfterMs: 400, +}); + +(async () => { + for await (const delta of completion({ modelId: llmModelId /* ... */ }).tokenStream) { + session.write(delta); + } + session.end(); +})(); + +for await (const chunk of session) { + // chunk.buffer / chunk.chunkIndex / chunk.sentenceChunk + if (chunk.done) break; +} +``` + +### Per-segment whisper metadata + +Both `transcribe` (batch) and `transcribeStream` (duplex) now return structured +`TranscribeSegment` objects with start/end timestamps, segment IDs, and an `append` +flag — enabling proper subtitle generation and timeline alignment instead of raw text +concatenation. + +```typescript +const segments = await transcribe({ modelId, audioChunk: audioFilePath, metadata: true }); +for (const s of segments) { + console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`); +} +``` + +### Suspend lifecycle gate and `state()` + +`suspend()` is now serialized through a lifecycle gate that prevents overlapping +suspend/resume races. A new `state()` API reports the current lifecycle phase: +`active`, `suspending`, `suspended`, or `resuming`. + +```typescript +import { state, suspend, resume, type LifecycleState } from "@qvac/sdk"; + +await suspend(); +const current: LifecycleState = await state(); +if (current !== "active") { + await resume(); +} +``` + +### Registry download retries and configurable stream timeout + +Two new SDK config knobs cover slow/unstable links: `registryDownloadMaxRetries` retries +`REQUEST_TIMEOUT` failures (set to `0` to disable), and `registryStreamTimeoutMs` +extends the per-block stream timeout beyond the default 60s. + +```typescript +import { setSDKConfig } from "@qvac/sdk"; + +setSDKConfig({ + registryDownloadMaxRetries: 5, + registryStreamTimeoutMs: 180_000, +}); +``` + +### Auto KV-cache: replay the canonical assistant turn + +When auto KV-cache is enabled, the completion result now exposes +`final.cacheableAssistantContent` — the exact assistant string the SDK persisted to the +cache key on this turn. Push it back into `history` verbatim on the next turn to +guarantee a cache hit. Tool-call turns aren't auto-cached today and omit the field; +fall back to `final.contentText` in that case. + +```typescript +const run = completion({ modelId, history, kvCache: true }); +for await (const _ of run.tokenStream) { /* stream */ } +const final = await run.final; +const nextHistory = [ + ...history, + { role: "assistant", content: final.cacheableAssistantContent ?? final.contentText }, + { role: "user", content: "follow-up question" }, +]; +``` + +### LLM-addon cache API plumbed through SDK + +The SDK now wires through the LLM addon's first-class cache API — including explicit +`deleteCache({ kvCacheKey })` for evicting a named cache key — so consumers can manage +KV-cache lifetimes alongside `loadModel`/`unloadModel`. + +### NMTcpp 2.0.1 surface + +The SDK NMT plugin now targets `@qvac/translation-nmtcpp 2.0.1` with a structured +constructor that distinguishes primary and pivot model files, vocab files, and pivot +config (beam size, top-k). Bergamot models are also picked up via path-based vocab +resolution and grouped into companion sets, which lets the cache and download paths +treat them like any other multi-file model. + +## Features + +### Parallel orchestration and download dedupe + +Model loading is now genuinely parallel where it can be: the primary model and any +companion files (vision projection, vocab, etc.) download concurrently, and concurrent +requests for the same asset are deduplicated to a single transfer. Cancellation cleans +up all active transfers atomically with no leaked state. Profiling fields +(`sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`, +`modelInitializationTime`, `checksumValidationTime`) are populated correctly across +both primary and companion downloads, with aggregate stats merged at the run level. + +The companion pipeline is also generic: `companions.ts` is the only format-aware piece, +and adding a new multi-file format is a matter of dropping in a detection function and +registering it with `groupCompanionSets`. Everything downstream — codegen, resolver, +cache probing, storage cleanup — handles it automatically. + +### Real-time voice assistant example + +A new end-to-end example demonstrates a real-time voice assistant pipeline (whisper → +LLM → TTS) wired together using the SDK's streaming primitives. + +## Bug Fixes + +- RPC initialization in the Node runtime now has an explicit timeout, so a wedged + transport can no longer hang `loadModel`/`unloadModel` indefinitely. +- The registry client now opens its corestore with `wait: true`, eliminating a startup + race where downloads could begin before replication was ready. +- KV-cache `savedCount` is no longer incremented on cancelled or zero-token turns, + preventing inflated cache stats. +- `delete-cache` RPC now scopes invalidation to the deleted key only instead of wiping + unrelated entries. +- Delegated transports strip the `__profiling` envelope before zod validation, fixing a + spurious validation error when profiling is enabled on the consumer side. +- Replaced `z.xor` with `z.union` and bumped the zod floor to `^4.3.0` to track upstream + breaking changes. +- LLM-based translation now uses deterministic decoding so the same input produces the + same output across runs. +- Inflight delegation requests that get rejected now run their cleanup chain to + completion instead of leaking pending promises. + +## Model Registry Changes + +The model registry was regenerated around companion-set metadata. The user-facing surface +is leaner: families that used to live as separate `*_DATA`, `*_LEX`, `*_VOCAB`, and +`METADATA_*` constants are now companion-only — they're still downloaded, but they're +not addressable as standalone model sources. Marian Opus models were renamed under the +`NMT_*` namespace to match the rest of the NMT family. + +### Added + +``` +NMT_Q0F16 through NMT_Q0F16_9 (10 entries) +NMT_Q4_0 through NMT_Q4_0_12+ (22 entries) +``` + +### Removed (now companion-only or renamed) + +``` +*_DATA (32 entries — companion-only, e.g. PARAKEET_TDT_ENCODER_DATA_FP32, TTS_*_DATA) +BERGAMOT_*_LEX (93 entries — companion-only) +BERGAMOT_*_VOCAB (93 entries — companion-only) +BERGAMOT_METADATA_* (87 entries — companion-only) +MARIAN_OPUS_* (32 entries — renamed to NMT_*) +``` + +## Documentation, Tests, and Infrastructure + +- Diffusion documentation was extended to cover the new img2img flows (SDEdit on + SD/SDXL, in-context conditioning on FLUX.2). +- Android sharded-model-resume tests no longer trip Scudo OOM — the test harness now + bounds memory more conservatively on long-running resume scenarios. +- The tests-qvac docs, tooling, and CI workflow job names were refreshed for the new + suite filtering and PR-triggered e2e workflows. Suite filtering plus PR-trigger labels + let CI run targeted SDK e2e subsets on demand instead of always running the full grid. +- A pre-terminate cleanup hook stabilises mobile smoke: the mobile auto-close path now + awaits worker cleanup acknowledgement before terminating the worklet. +- `DataLoader` cleanup logic was scoped down to `packages/rag` so the SDK no longer + carries that surface. diff --git a/packages/sdk/changelog/0.10.0/api.md b/packages/sdk/changelog/0.10.0/api.md new file mode 100644 index 0000000000..69812b7c61 --- /dev/null +++ b/packages/sdk/changelog/0.10.0/api.md @@ -0,0 +1,501 @@ +# 🔌 API Changes v0.10.0 + +## Update SDK nmtcpp plugin for @qvac/translation-nmtcpp 2.0.1 + +PR: [#1563](https://github.com/tetherto/qvac/pull/1563) + +```typescript +// NMT addon constructor (2.0.1) — called by SDK plugin +new TranslationNmtcpp({ + files: { + model: '/path/to/model.bin', + srcVocab: '/path/to/vocab.spm', + dstVocab: '/path/to/vocab.spm', + pivotModel: '/path/to/pivot.bin', // optional + pivotSrcVocab: '/path/to/pivot-vocab.spm', // optional + pivotDstVocab: '/path/to/pivot-vocab.spm', // optional + }, + params: { srcLang: 'en', dstLang: 'fr' }, + config: { + modelType: TranslationNmtcpp.ModelTypes.Bergamot, + beamsize: 4, + pivotConfig: { beamsize: 4, topk: 100 }, // optional + }, + logger, + opts: { stats: true }, +}) +``` + +--- + +## Add sentence-level streaming for onnx text-to-speech + +PR: [#1590](https://github.com/tetherto/qvac/pull/1590) + +```typescript +import { loadModel, textToSpeech, unloadModel } from "@qvac/sdk"; + +const modelId = await loadModel({ /* ...Supertonic ONNX TTS config... */ }); + +const result = textToSpeech({ + modelId, + text: "Your long passage here.", + inputType: "text", + stream: true, + sentenceStream: true, + sentenceStreamLocale: "en", +}); + +for await (const chunk of result.chunkUpdates!) { + // chunk.buffer -> int16 PCM samples for this sentence + // chunk.chunkIndex -> 0-based sentence index + // chunk.sentenceChunk -> source text for this chunk +} + +await result.done; +await unloadModel({ modelId }); +``` + +```typescript +import { completion, textToSpeechStream } from "@qvac/sdk"; + +const session = await textToSpeechStream({ + modelId: ttsModelId, + inputType: "text", + accumulateSentences: true, + sentenceDelimiterPreset: "latin", // "latin" | "cjk" | "multilingual" + flushAfterMs: 400, +}); + +(async () => { + for await (const delta of completion({ modelId: llmModelId, /* ... */ }).tokenStream) { + session.write(delta); + } + session.end(); +})(); + +for await (const chunk of session) { + // chunk.buffer -> int16 PCM for this sentence / flush window + // chunk.chunkIndex -> optional sentence index + // chunk.sentenceChunk-> optional source text + if (chunk.done) break; +} +``` + +--- + +## Support the new llm addon cache api in sdk + +PR: [#1633](https://github.com/tetherto/qvac/pull/1633) + +```ts +import { + completion, + deleteCache, + LLAMA_3_2_1B_INST_Q4_0, + loadModel, + unloadModel, + VERBOSITY, +} from "@qvac/sdk"; + +type ChatMessage = { + role: string; + content: string; +}; + +const cacheKey = "trip-planner"; + +const modelId = await loadModel({ + modelSrc: LLAMA_3_2_1B_INST_Q4_0, + modelType: "llm", + modelConfig: { + ctx_size: 4096, + verbosity: VERBOSITY.ERROR, + }, +}); + +async function run(history: ChatMessage[]) { + const result = completion({ + modelId, + history, + stream: true, + kvCache: cacheKey, + }); + + let text = ""; + for await (const token of result.tokenStream) { + text += token; + } + + return text.trim(); +} + +const firstReply = await run([ + { role: "system", content: "You are a concise travel assistant." }, + { role: "user", content: "I like museums and seafood. Plan a day in Lisbon." }, +]); + +const followUpReply = await run([ + { role: "system", content: "You are a concise travel assistant." }, + { role: "user", content: "I like museums and seafood. Plan a day in Lisbon." }, + { role: "assistant", content: firstReply }, + { role: "user", content: "Now make it a rainy-day itinerary." }, +]); + +console.log(followUpReply); + +await deleteCache({ kvCacheKey: cacheKey }); +await unloadModel({ modelId, clearStorage: false }); +``` + +--- + +## Add img2img support to SDK diffusion API + +PR: [#1662](https://github.com/tetherto/qvac/pull/1662) + +```typescript +import { loadModel, diffusion, SD_V2_1_1B_Q8_0 } from "@qvac/sdk"; +import fs from "fs"; + +const modelId = await loadModel({ modelSrc: SD_V2_1_1B_Q8_0, modelType: "diffusion" }); + +// SD / SDXL — SDEdit +const initImage = new Uint8Array(fs.readFileSync("input.png")); +const { outputs } = diffusion({ + modelId, + prompt: "oil painting style, vibrant colors", + init_image: initImage, + strength: 0.5, // 0 = keep source, 1 = ignore source +}); + +// FLUX.2 — in-context conditioning +// NOTE: requires `prediction: "flux2_flow"` set on the model config at loadModel time. +// `strength` is ignored on this path. +const { outputs: fluxOutputs } = diffusion({ + modelId, + prompt: "turn into watercolor", + init_image: initImage, +}); + +const buffers = await outputs; +fs.writeFileSync("out.png", buffers[0]!); +``` + +--- + +## Harden suspend with lifecycle gate and add state() api + +PR: [#1691](https://github.com/tetherto/qvac/pull/1691) + +```typescript +import { state, suspend, resume, type LifecycleState } from "@qvac/sdk"; + +await suspend(); + +const current: LifecycleState = await state(); +// "active" | "suspending" | "suspended" | "resuming" + +if (current !== "active") { + await resume(); +} +``` + +--- + +## Propagate whisper per-segment metadata to SDK users + +PR: [#1701](https://github.com/tetherto/qvac/pull/1701) + +```typescript +// Batch — returns TranscribeSegment[] instead of string +const segments = await transcribe({ + modelId, + audioChunk: audioFilePath, + metadata: true, +}); +for (const s of segments) { + console.log(`[${s.startMs}ms → ${s.endMs}ms] id=${s.id} append=${s.append} ${s.text}`); +} + +// Duplex streaming — session iterator yields TranscribeSegment +const session = await transcribeStream({ modelId, metadata: true }); +session.write(audioChunk); +for await (const segment of session) { + console.log(segment.startMs, segment.endMs, segment.text); +} +session.end(); +``` + +```typescript +type TranscribeSegment = { + text: string; + startMs: number; + endMs: number; + append: boolean; + id: number; +}; +``` + +--- + +## Make auto KV-cache reuse completed turn history + +PR: [#1705](https://github.com/tetherto/qvac/pull/1705) + +```typescript +// New: `final.cacheableAssistantContent` — the canonical assistant +// string the SDK persisted to the auto-cache key on this turn. +// Push it back into `history` verbatim to guarantee a next-turn hit. +const run = completion({ modelId, history, kvCache: true }); +for await (const _ of run.tokenStream) { /* stream */ } +const final = await run.final; +const nextHistory = [ + ...history, + { + role: "assistant", + // Falls back to contentText for tool-call turns, which can't + // be auto-cached today and therefore omit the field. + content: final.cacheableAssistantContent ?? final.contentText, + }, + { role: "user", content: "follow-up question" }, +]; +``` + +--- + +## Propagate registry download retries and expose stream timeout + +PR: [#1743](https://github.com/tetherto/qvac/pull/1743) + +```ts +import { setSDKConfig } from "@qvac/sdk"; + +setSDKConfig({ + // Retry REQUEST_TIMEOUT failures up to N times before giving up. + // Set to 0 to disable retries entirely. + registryDownloadMaxRetries: 5, + + // Raise the per-block stream timeout for slow/high-latency links + // (default: 60_000 ms). + registryStreamTimeoutMs: 180_000, +}); +``` + +--- + +## Improve model type & capability system + +PR: [#1748](https://github.com/tetherto/qvac/pull/1748) + +```typescript +import type { LoadModelOptions } from "@qvac/sdk"; + +const opts: LoadModelOptions = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +``` + +```typescript +import type { LoadCustomPluginModelOptions } from "@qvac/sdk"; + +// Generic must be supplied; it pins the literal plugin string. +const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); + +// Or just drop the annotation — TS picks the right overload. +``` + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.PLUGIN_HANDLER_NOT_FOUND) { + /* ... */ + } +} +``` + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) { + // Message includes the requested operation, the loaded model type, + // supported operations on the loaded model, and suggested model types. + } +} +``` + +```typescript +// Loaded an NMT model, but called translate with modelType: "llm". +// Worker routed to the NMT plugin (modelId-based) but treated the input as LLM-style. Confusing failures. +await translate({ modelId: nmtModelId, modelType: "llm", text: "..." }); +``` + +```typescript +// Drop modelType; the registered type drives behavior. +await translate({ modelId: nmtModelId, text: "..." }); + +// Or keep it — but it must match the loaded type, otherwise: +// ModelTypeMismatchError: expected "nmtcpp-translation", got "llamacpp-completion" +await translate({ modelId: nmtModelId, modelType: "nmt", text: "..." }); +``` + +```typescript +import { getLoadedModelInfo, transcribe } from "@qvac/sdk"; + +// Introspect a loaded modelId (local or delegated). Discriminated on `isDelegated`. +const info = await getLoadedModelInfo({ modelId }); + +// Preflight a built-in SDK call before sending the RPC. +// Local: handlers + modelType are authoritative. +// Delegated: handlers is [] and preflight defers to the provider. +if (info.isDelegated || info.handlers.includes("transcribeStream")) { + await transcribe({ modelId /* ... */ }); +} + +if (!info.isDelegated) { + // info.modelType, info.loadedAt + // info.displayName?, info.addonPackage? (from the plugin) + // info.name?, info.path? (from the model file) +} + +// Throws ModelNotFoundError if modelId isn't loaded. +``` + +--- + +## Add responseFormat for structured output + +PR: [#1768](https://github.com/tetherto/qvac/pull/1768) + +```typescript +import { completion } from "@qvac/sdk"; + +const run = completion({ + modelId, + history: [{ role: "user", content: "Extract person info: I'm Alice, 30, data engineer." }], + stream: true, + responseFormat: { + type: "json_schema", + json_schema: { + name: "Person", + schema: { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "integer" }, + occupation: { type: "string" }, + }, + required: ["name", "age", "occupation"], + additionalProperties: false, + }, + }, + }, +}); + +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); +} +const final = await run.final; +JSON.parse(final.contentText); // guaranteed schema-valid +``` + +--- + +## Sdk "dynamic" tools mode + +PR: [#1779](https://github.com/tetherto/qvac/pull/1779) + +```typescript + import { loadModel, completion, TOOLS_MODE, QWEN3_1_7B_INST_Q4 } from "@qvac/sdk"; + + // Opt into dynamic tools by setting `toolsMode` on the model config. + const modelId = await loadModel({ + modelSrc: QWEN3_1_7B_INST_Q4, + modelType: "llm", + modelConfig: { + ctx_size: 4096, + tools: true, + toolsMode: TOOLS_MODE.dynamic, // or the literal string "dynamic" + }, + }); + + const kvCache = `dynamic-tools-${Date.now()}`; + const history = [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "What's the weather in Tokyo?" }, + ]; + + // Turn 1 — weather tools available. + const turn1 = completion({ + modelId, + history, + kvCache, + stream: true, + tools: [{ name: "get_weather", description: "...", parameters: weatherSchema }], + }); + + // Turn 2 — same kvCache, completely different tools. The addon trims the + // previous tool block from the cache, so this rotation is free. + history.push({ role: "user", content: "Now check my horoscope for Aquarius." }); + const turn2 = completion({ + modelId, + history, + kvCache, + stream: true, + tools: [{ name: "get_horoscope", description: "...", parameters: horoscopeSchema }], + }); + ``` + +--- + +## Pre-terminate cleanup hook + stabilise mobile smoke + +PR: [#1797](https://github.com/tetherto/qvac/pull/1797) + +```typescript +// Mobile auto-close path (unchanged from caller perspective): +await close(); // now awaits worker cleanup ack before terminating worklet +``` + +--- + +## Add native tool-call dialect routing (hermes, pythonic, json) with override + +PR: [#1802](https://github.com/tetherto/qvac/pull/1802) + +```typescript +// New optional `toolDialect` parameter on completion() — force a specific +// parser chain when the SDK can't auto-detect from the model name. +import { completion } from "@qvac/sdk"; + +const result = completion({ + modelId, + history, + tools, + stream: true, + toolDialect: "pythonic", // "hermes" | "pythonic" | "json" +}); + +Common override case: Llama 3.x tool-calling fine-tunes that emit the native pythonic header (`<|start_header_id|>tool_call<|end_header_id|>...<|eot_id|>`). Auto-routing keeps these on `hermes` because most observed Llama 3.x tool-calling tunes empirically emit JSON, not pythonic — pass `toolDialect: "pythonic"` for tunes that do emit the native framing +``` + +```typescript +import type { ToolDialect } from "@qvac/sdk"; +``` + +--- + diff --git a/packages/sdk/changelog/0.10.0/breaking.md b/packages/sdk/changelog/0.10.0/breaking.md new file mode 100644 index 0000000000..735e816d6b --- /dev/null +++ b/packages/sdk/changelog/0.10.0/breaking.md @@ -0,0 +1,278 @@ +# 💥 Breaking Changes v0.10.0 + +## Add parallel orchestration, download dedupe, and generic companion-set support + +PR: [#1636](https://github.com/tetherto/qvac/pull/1636) + +**BEFORE:** +** + +```typescript +onProgress: (progress) => { + if (progress.onnxInfo) { + console.log( + `[${progress.onnxInfo.currentFile}] ` + + `file ${progress.onnxInfo.fileIndex}/${progress.onnxInfo.totalFiles} — ` + + `${progress.onnxInfo.overallPercentage.toFixed(1)}% overall` + ); + } +} +``` + +** + +**AFTER:** +** + +```typescript +onProgress: (progress) => { + if (progress.fileSetInfo) { + console.log( + `[${progress.fileSetInfo.currentFile}] ` + + `file ${progress.fileSetInfo.fileIndex}/${progress.fileSetInfo.totalFiles} — ` + + `${progress.fileSetInfo.overallPercentage.toFixed(1)}% overall` + ); + } +} +``` + +## 🔌 Extensibility: adding a new companion format + +The companion pipeline is generic. Only `companions.ts` contains format-specific detection (currently ONNX). To add a new format: + +1. Add a detection function in `companions.ts` +2. Call it from `groupCompanionSets` + +Everything downstream (codegen, resolver, cache probing, storage cleanup) handles it automatically. + +## 🧪 How was it tested? + +- Unit tests for `resolveClearStorageTarget` — companion set paths, legacy ONNX paths, flat cache, outside-cache paths, trailing slashes, Windows backslash paths +- Unit tests for `groupCompanionSets` — ONNX + `_data`/`.data` patterns, non-ONNX models, deterministic `setKey` generation +- **Companion set smoke test** — ran Parakeet CTC and TDT end-to-end, validated all 4 cache paths (legacy `_data` probe, legacy `.data` probe, canonical fresh download, canonical cache hit) with correct transcription output on each +- **Parallel orchestration**: ran `examples/llamacpp-multimodal.ts` with labeled progress — confirmed primary and projection models download concurrently via interleaved output +- **Profiling**: ran `examples/profiling/basic.ts` — confirmed `sourceType`, `cacheHit`, `sharedTransfer`, `totalLoadTime`, `modelInitializationTime`, `checksumValidationTime` populate correctly through `buildDownloadProfilingFields()`. Ran `examples/llamacpp-multimodal.ts` with profiling enabled — confirmed aggregate stats merge correctly across primary and projection downloads +- **Cancellation**: `^C` during multimodal download cleanly aborts both active transfers with no leaked state +- Build, lint, and typecheck pass + +--- + +## Unified CompletionEvent stream as canonical completion API + +PR: [#1673](https://github.com/tetherto/qvac/pull/1673) + +**BEFORE:** +** + +```typescript +// Wire response +{ type: "completionStream", token: "Hello", toolCallEvent: {...} } +{ type: "completionStream", token: "", done: true, stats: {...}, toolCalls: [...] } +``` + +** + +**AFTER:** +** + +```typescript +// Wire response +{ type: "completionStream", events: [{ type: "contentDelta", seq: 0, text: "Hello" }] } +{ type: "completionStream", done: true, events: [ + { type: "completionStats", seq: 5, stats: {...} }, + { type: "completionDone", seq: 6, raw: { fullText: "..." } } +]} +``` + +**Client API**: `completion()` return type is now `CompletionRun` (was anonymous object). Legacy fields still work but are derived views. + +**BEFORE:** + +```typescript +const result = completion({ modelId, history, stream: true }); +for await (const token of result.tokenStream) { ... } +const stats = await result.stats; +``` + +**AFTER:** + +```typescript +const run = completion({ modelId, history, stream: true, captureThinking: true }); +for await (const event of run.events) { + if (event.type === "contentDelta") process.stdout.write(event.text); + if (event.type === "toolCall") console.log(event.call.name); +} +const result = await run.final; +// result.contentText, result.thinkingText, result.toolCalls, result.stats, result.raw.fullText +``` + +## 🧪 How was it tested? + +- **Unit tests**: event schema validation and wire strictness, normalizer state machine (content, thinking, tool framing, fail-open, error-finish, scoped dedupe), and client-side event aggregation with error-done rejection +- **Manual**: ran `examples/completion-events.ts` (new event-driven API) and existing legacy examples — both produce correct output +- Build and typecheck passes + +--- + +## Migrate SDK plugins to new addon constructor shape + +PR: [#1688](https://github.com/tetherto/qvac/pull/1688) + +**BEFORE:** +** + +```typescript +export const myPlugin = definePlugin({ + // ... + createModel(params: CreateModelParams): PluginModelResult { + return { model, loader: null }; + }, +}); +``` + +** + +**AFTER:** +** + +--- + +## Switch delegation to direct DHT connect, drop topic end-to-end + +PR: [#1729](https://github.com/tetherto/qvac/pull/1729) + +**BEFORE:** +** +- Consumer: `swarm.join(topic)` → `swarm.flush()` → wait for `connection` event matching `peerPublicKey` → filter out everyone else. +- Provider: `swarm.join(topic, { server: true })` → `discovery.flushed()` → `swarm.flush()` (full topic announce on the DHT). + +** + +**AFTER:** +** +- Consumer: `swarm.dht.connect(publicKey)` — direct connection, no discovery, no filtering. +- Provider: `swarm.listen()` — binds the DHT server on the keyPair so consumers can reach it via `dht.connect(publicKey)`. No topic announce. + +--- + +## Improve model type & capability system + +PR: [#1748](https://github.com/tetherto/qvac/pull/1748) + +**BEFORE:** +** + +```typescript +import type { LoadModelOptions } from "@qvac/sdk"; + +const opts: LoadModelOptions = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); +``` + +** + +**AFTER:** +** + +```typescript +import type { LoadCustomPluginModelOptions } from "@qvac/sdk"; + +// Generic must be supplied; it pins the literal plugin string. +const opts: LoadCustomPluginModelOptions<"my-custom-plugin"> = { + modelSrc: "/path/foo", + modelType: "my-custom-plugin", + modelConfig: { whatever: 1 }, +}; +await loadModel(opts); + +// Or just drop the annotation — TS picks the right overload. +``` + +### Wrong-model error code/message change (runtime) + +Built-in SDK operations now surface `MODEL_OPERATION_NOT_SUPPORTED` instead of `PLUGIN_HANDLER_NOT_FOUND`. Low-level `pluginInvoke` / `pluginInvokeStream` still use `PLUGIN_HANDLER_NOT_FOUND`. + +**BEFORE:** + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.PLUGIN_HANDLER_NOT_FOUND) { + /* ... */ + } +} +``` + +**AFTER:** + +```typescript +import { SDK_SERVER_ERROR_CODES } from "@qvac/sdk"; + +try { + await transcribe({ modelId: llmModelId /* ... */ }); +} catch (e) { + if ((e as { code?: number })?.code === SDK_SERVER_ERROR_CODES.MODEL_OPERATION_NOT_SUPPORTED) { + // Message includes the requested operation, the loaded model type, + // supported operations on the loaded model, and suggested model types. + } +} +``` + +`PLUGIN_HANDLER_NOT_FOUND` is still the low-level path for `pluginInvoke` / `pluginInvokeStream`. + +### `translate(...)` validates caller-supplied `modelType` against loaded type (runtime) + +`translate(...)` now routes by the loaded model's registered type. A mismatched caller-supplied `modelType` throws `ModelTypeMismatchError` instead of being silently mis-routed. + +**BEFORE:** + +```typescript +// Loaded an NMT model, but called translate with modelType: "llm". +// Worker routed to the NMT plugin (modelId-based) but treated the input as LLM-style. Confusing failures. +await translate({ modelId: nmtModelId, modelType: "llm", text: "..." }); +``` + +**AFTER:** + +```typescript +// Drop modelType; the registered type drives behavior. +await translate({ modelId: nmtModelId, text: "..." }); + +// Or keep it — but it must match the loaded type, otherwise: +// ModelTypeMismatchError: expected "nmtcpp-translation", got "llamacpp-completion" +await translate({ modelId: nmtModelId, modelType: "nmt", text: "..." }); +``` + +## 🔌 API Changes + +```typescript +import { getLoadedModelInfo, transcribe } from "@qvac/sdk"; + +// Introspect a loaded modelId (local or delegated). Discriminated on `isDelegated`. +const info = await getLoadedModelInfo({ modelId }); + +// Preflight a built-in SDK call before sending the RPC. +// Local: handlers + modelType are authoritative. +// Delegated: handlers is [] and preflight defers to the provider. +if (info.isDelegated || info.handlers.includes("transcribeStream")) { + await transcribe({ modelId /* ... */ }); +} + +if (!info.isDelegated) { + // info.modelType, info.loadedAt + // info.displayName?, info.addonPackage? (from the plugin) + // info.name?, info.path? (from the model file) +} + +// Throws ModelNotFoundError if modelId isn't loaded. +``` + +--- + diff --git a/packages/sdk/changelog/0.10.0/models.md b/packages/sdk/changelog/0.10.0/models.md new file mode 100644 index 0000000000..fe7acc2b40 --- /dev/null +++ b/packages/sdk/changelog/0.10.0/models.md @@ -0,0 +1,20 @@ +# 📦 Model Changes v0.10.0 + +## Added Models + +``` +NMT_Q0F16 through NMT_Q0F16_9 +NMT_Q4_0 through NMT_Q4_0_12+ +``` + +## Removed Models + +``` +MARIAN_OPUS_* +``` + +--- + +### Related PRs + +- [#1700](https://github.com/tetherto/qvac/pull/1700) - Regenerate model registry with companion-set metadata diff --git a/packages/sdk/package.json b/packages/sdk/package.json index f241b7637e..e54eb963cf 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -1,6 +1,6 @@ { "name": "@qvac/sdk", - "version": "0.9.1", + "version": "0.10.0", "license": "Apache-2.0", "repository": { "type": "git", diff --git a/scripts/sdk/generate-changelog-sdk-pod.cjs b/scripts/sdk/generate-changelog-sdk-pod.cjs index da3ec14151..363464ecd1 100644 --- a/scripts/sdk/generate-changelog-sdk-pod.cjs +++ b/scripts/sdk/generate-changelog-sdk-pod.cjs @@ -33,6 +33,54 @@ const SECTIONS = [ { key: "infra", title: "⚙️ Infrastructure" }, ]; +/** + * Maximum number of model entries to inline per section (Added/Updated/Removed) + * in the main CHANGELOG.md. Anything beyond is collapsed to "(and N more)" and + * the reader is expected to follow the link to models.md for the full list. + */ +const MAX_INLINE_MODELS = 5; + +/** + * Maximum number of bullets to include per section in the Slack announcement + * post. Anything beyond is collapsed to "... And much more, see full list in + * changelog :memo:". Sections with 10 or fewer entries are emitted verbatim; + * the "And much more" suffix only appears when a section has *more than 10* + * entries. + */ +const MAX_ANNOUNCEMENT_BULLETS = 10; + +/** + * Map of section keys to Slack-style emoji headings used in the + * announcement-post.txt template. Slack does not render the unicode emojis + * we use in CHANGELOG.md, so we translate to shortcodes here. + */ +const SLACK_SECTION_HEADINGS = { + feat: ":sparkles: Features", + api: ":electric_plug: API", + fix: ":ladybug: Fixes", + mod: ":package: Models", + doc: ":blue_book: Docs", + test: ":test_tube: Tests", + chore: ":broom: Chores", + infra: ":gear: Infrastructure", +}; + +/** + * Map from the unicode emoji used in CHANGELOG.md headings back to the + * internal section key, so the announcement generator can parse a freshly + * written CHANGELOG.md without re-running the upstream PR fetch. + */ +const CHANGELOG_HEADING_TO_KEY = { + "✨": "feat", + "🔌": "api", + "🐞": "fix", + "📦": "mod", + "📘": "doc", + "🧪": "test", + "🧹": "chore", + "⚙️": "infra", +}; + /** * Extract code blocks from markdown * @param {string} text @@ -302,6 +350,105 @@ function capitalize(str) { return str.charAt(0).toUpperCase() + str.slice(1); } +/** + * Detect a companion entry in a Models section line. Companions (vocab files, + * lexicons, raw data shards, metadata blobs, etc.) ship alongside a primary + * model but aren't independently usable models, so we exclude them from the + * changelog and announcement post — only first-class models should be + * surfaced to readers. + * + * Recognises both: + * - Constant-name suffixes: `*_LEX`, `*_VOCAB`, `*_DATA`, `*_METADATA` + * - Free-form descriptions containing the word "companion" + * + * @param {string} entry - One Added/Updated/Removed list line + * @returns {boolean} + */ +function isCompanionEntry(entry) { + if (!entry) return false; + if (/companion/i.test(entry)) return true; + if (/_lex\b/i.test(entry)) return true; + if (/_vocab\b/i.test(entry)) return true; + if (/_data\b/i.test(entry)) return true; + if (/_metadata\b/i.test(entry)) return true; + return false; +} + +/** + * Strip "(N entries …)" / "(N entries — …)" suffixes commonly used in + * free-form Models sections (e.g. PR #1700-style summaries). The reader can + * find exact counts in models.md if they need them; the changelog should + * stay focused on the model identities themselves. + * + * @param {string} entry + * @returns {string} + */ +function stripEntryCount(entry) { + if (!entry) return entry; + return entry + .replace(/\s*\(\s*\d+\s*entries?(?:\s*[—–-][^)]*)?\)\s*/gi, "") + .trim(); +} + +/** + * Apply changelog model-section policy to a raw list of entries: drop + * companions, strip entry-count suffixes, drop empty results. + * + * @param {string[]} entries + * @returns {string[]} + */ +function cleanModelEntries(entries) { + if (!entries || entries.length === 0) return []; + return entries + .filter((e) => !isCompanionEntry(e)) + .map((e) => stripEntryCount(e)) + .filter((e) => e && e.length > 0); +} + +/** + * Format a single model section (Added / Updated / Removed) for inline display + * in the main CHANGELOG.md. Trims to MAX_INLINE_MODELS entries with a + * "(and N more)" suffix. Returns null if the section is empty. + * + * Companions and entry counts are filtered upstream by `cleanModelEntries`. + * + * @param {string} label - e.g. "Added", "Updated", "Removed" + * @param {string[]} names + * @returns {string|null} + */ +function summarizeModelList(label, names) { + const cleaned = cleanModelEntries(names); + if (cleaned.length === 0) return null; + const shown = cleaned.slice(0, MAX_INLINE_MODELS); + const extra = cleaned.length - shown.length; + let summary = `${label}: ${shown.join(", ")}`; + if (extra > 0) summary += ` (and ${extra} more)`; + return summary; +} + +/** + * Build a per-section summary (Added / Updated / Removed) of the model lists + * from a PR body, suitable for use as indented continuation lines under a + * CHANGELOG.md bullet. Returns null if the PR has no Models section or all + * sections are empty after companion/entry-count filtering. + * + * Returns an array of lines (one per non-empty section). The caller is + * responsible for indenting them appropriately under the bullet. + * + * @param {string} prBody + * @returns {string[]|null} + */ +function buildInlineModelSummary(prBody) { + const models = extractModelsSection(prBody); + if (!models) return null; + const parts = [ + summarizeModelList("Added", models.added), + summarizeModelList("Updated", models.updated), + summarizeModelList("Removed", models.removed), + ].filter(Boolean); + return parts.length > 0 ? parts : null; +} + /** * Generate changelog entry * @param {object} pr @@ -337,6 +484,20 @@ function generateChangelogEntry( entry += ` - See ${links.join(", ")}`; } + // For [mod] PRs, append the trimmed Added/Updated/Removed model lists as + // indented continuation lines under the bullet. Companions and entry-count + // suffixes are filtered out by `buildInlineModelSummary`, so what shows up + // here are first-class model identities only — the full list (including + // companions, if the PR author chose to keep them) lives in models.md. + if (parsed.tags.includes("mod")) { + const modelLines = buildInlineModelSummary(pr.body); + if (modelLines) { + for (const line of modelLines) { + entry += `\n ${line}`; + } + } + } + return entry; } @@ -486,10 +647,12 @@ function generateChangelogFiles(packageName, version, prs, outputDir, baseRef) { } } - // Sort alphabetically - const addedList = [...allAdded].sort(); - const updatedList = [...allUpdated].sort(); - const removedList = [...allRemoved].sort(); + // Apply the changelog model-section policy: drop companions, strip + // entry-count suffixes. Keeps models.md aligned with what the main + // CHANGELOG.md surfaces — only first-class model identities. + const addedList = cleanModelEntries([...allAdded].sort()); + const updatedList = cleanModelEntries([...allUpdated].sort()); + const removedList = cleanModelEntries([...allRemoved].sort()); let modelsMd = `# 📦 Model Changes v${version}\n\n`; @@ -605,6 +768,25 @@ function groupModelsByPrefix(names) { return groups; } +/** + * Detect a backmerge PR subject. + * + * Backmerges merge a release branch back into main; their content is already + * documented in the release branch's own changelog, so listing them here is + * noise. Recognises the QVAC convention (`Backmerge release sdk 0.9.1`) plus + * common variants like `Merge release-sdk-0.9.1 into main`. + * + * @param {string} subject - PR subject (after prefix/tags) + * @returns {boolean} + */ +function isBackmergeSubject(subject) { + if (!subject) return false; + const s = subject.trim().toLowerCase(); + if (s.startsWith("backmerge")) return true; + if (/^merge\s+release[\s-]/.test(s)) return true; + return false; +} + /** * Process raw PRs with SDK-specific validation and filtering * @param {Array<{number: number, title: string, body: string, url: string}>} rawPRs @@ -631,6 +813,13 @@ function processSDKPRs(rawPRs) { continue; } + if (isBackmergeSubject(validation.parsed.subject)) { + console.log( + ` ⏭️ PR #${pr.number} is a backmerge, excluding from changelog`, + ); + continue; + } + prs.push({ number: pr.number, title: pr.title, @@ -726,6 +915,252 @@ function rebuildRootChangelog(packageName) { ); } +/** + * Parse a generated CHANGELOG.md into structured sections with bullet entries. + * Used by the announcement-post generator so it can transform the canonical + * release changelog without re-fetching PRs from GitHub. + * + * @param {string} markdown - Contents of CHANGELOG.md + * @returns {{ + * version: string|null, + * releaseDate: string|null, + * sections: Array<{ key: string, heading: string, bullets: Array<{ + * text: string, + * prNumber: string|null, + * prUrl: string|null, + * isBreaking: boolean, + * isApi: boolean, + * isModels: boolean, + * }> }>, + * }} + */ +function parseChangelogMarkdown(markdown) { + const out = { version: null, releaseDate: null, sections: [] }; + + const versionMatch = markdown.match(/^# Changelog v(\d+\.\d+\.\d+)/m); + if (versionMatch) out.version = versionMatch[1]; + + const dateMatch = markdown.match(/^Release Date:\s*(\S+)/m); + if (dateMatch) out.releaseDate = dateMatch[1]; + + const lines = markdown.split("\n"); + let current = null; + let currentBullet = null; + + for (const rawLine of lines) { + const line = rawLine.replace(/\r$/, ""); + + // ## + const headingMatch = line.match(/^##\s+(\S+)\s+(.+?)\s*$/); + if (headingMatch) { + const [, emoji, title] = headingMatch; + const key = CHANGELOG_HEADING_TO_KEY[emoji]; + if (key) { + current = { key, heading: `${emoji} ${title}`, bullets: [] }; + out.sections.push(current); + currentBullet = null; + continue; + } + // Unknown heading — close the current section so stray bullets don't + // get attached to a previous, unrelated section. + current = null; + currentBullet = null; + continue; + } + + if (!current) continue; + + if (line.startsWith("- ")) { + // New bullet; flush the running bullet reference. + const prMatch = line.match(/\(see PR \[#(\d+)\]\(([^)]+)\)\)/); + const prNumber = prMatch ? prMatch[1] : null; + const prUrl = prMatch ? prMatch[2] : null; + + let text = line.slice(2); + if (prMatch) { + text = text.slice(0, prMatch.index - 2).trim(); + } + text = text.replace(/\s*-\s*See\s+\[.*$/, "").trim(); + text = text.replace(/\.+$/, "").trim(); + + const linkSuffix = line.slice( + prMatch ? prMatch.index + prMatch[0].length : 0, + ); + const isBreaking = /\[breaking changes\]/i.test(linkSuffix); + const isApi = /\[API changes\]/i.test(linkSuffix); + const isModels = /\[model changes\]/i.test(linkSuffix); + + currentBullet = { + text, + prNumber, + prUrl, + isBreaking, + isApi, + isModels, + continuation: [], + }; + current.bullets.push(currentBullet); + continue; + } + + // Indented continuation line (markdown convention: 2+ leading spaces or + // tab under the bullet). Preserve trimmed content so the announcement + // formatter can re-indent for Slack. + if (currentBullet && /^(\s{2,}|\t)/.test(line) && line.trim().length > 0) { + currentBullet.continuation.push(line.trim()); + continue; + } + + // Blank line or other content — close the running bullet so subsequent + // indented text doesn't accidentally attach to an unrelated bullet. + if (line.trim().length === 0) { + currentBullet = null; + } + } + + return out; +} + +/** + * Format a single bullet for the Slack announcement post. + * + * Continuation lines (e.g. `Added: …` / `Removed: …` for [mod] PRs) are + * preserved as separate lines indented by two spaces under the bullet. + * + * @param {{ + * text: string, + * prUrl: string|null, + * isBreaking: boolean, + * continuation?: string[], + * }} bullet + * @returns {string} + */ +function formatAnnouncementBullet(bullet) { + let line = `• ${bullet.text}`; + if (bullet.prUrl) line += ` (<${bullet.prUrl}>)`; + if (bullet.isBreaking) line += ` :boom: breaking`; + + if (bullet.continuation && bullet.continuation.length > 0) { + for (const cont of bullet.continuation) { + line += `\n ${cont}`; + } + } + + return line; +} + +/** + * Generate `announcement-post.txt` from a per-version CHANGELOG.md. + * + * The output is plaintext sized to be copy-pasted into Slack: shortcode + * emojis, `<url>` link wrapping (suppresses Slack unfurl), bullet rows kept + * on a single line, sections capped at MAX_ANNOUNCEMENT_BULLETS with an + * "...And much more" suffix when truncated. + * + * @param {string} packageName + * @param {string} version + * @returns {string|null} The output path on success, or null if CHANGELOG.md + * for the requested version wasn't found. + */ +function generateAnnouncementPost(packageName, version) { + const repoRoot = getRepoRoot(); + const versionDir = path.join( + repoRoot, + "packages", + packageName, + "changelog", + version, + ); + const changelogPath = path.join(versionDir, "CHANGELOG.md"); + + if (!fs.existsSync(changelogPath)) { + console.warn(`⚠️ No CHANGELOG.md found at ${changelogPath}`); + return null; + } + + const markdown = fs.readFileSync(changelogPath, "utf8"); + const parsed = parseChangelogMarkdown(markdown); + const releaseDate = + parsed.releaseDate || new Date().toISOString().split("T")[0]; + + const repoUrl = "https://github.com/tetherto/qvac"; + const npmName = `@qvac/${packageName}`; + const tagName = `${packageName}-v${version}`; + const changelogTreeUrl = `${repoUrl}/tree/main/packages/${packageName}/changelog/${version}`; + const breakingMdUrl = `${repoUrl}/blob/main/packages/${packageName}/changelog/${version}/breaking.md`; + const releaseTagUrl = `${repoUrl}/releases/tag/${tagName}`; + const npmUrl = `https://www.npmjs.com/package/${npmName}/v/${version}`; + + const hasBreaking = parsed.sections.some((s) => + s.bullets.some((b) => b.isBreaking), + ); + + let post = ""; + + // Header + post += `:qvac: SDK ${version} :rocket: NPM Public release\n\n`; + + // Links + post += `:package: NPM: ${npmUrl}\n`; + post += `:technologist: Github release: ${releaseTagUrl}\n`; + post += `:page_facing_up: Full Changelog: ${changelogTreeUrl}\n\n`; + + // Breaking + if (hasBreaking) { + post += `:warning: Breaking Changes\n`; + post += `See full migration guide: ${breakingMdUrl}\n\n`; + } + + post += `Release Date: ${releaseDate}\n\n`; + + // Sections — preserve the canonical order from SECTIONS, render only the + // ones that have bullets in the parsed changelog. + for (const section of SECTIONS) { + const heading = SLACK_SECTION_HEADINGS[section.key]; + if (!heading) continue; + + const matched = parsed.sections.find((s) => s.key === section.key); + if (!matched || matched.bullets.length === 0) continue; + + post += `${heading}\n`; + + const shown = matched.bullets.slice(0, MAX_ANNOUNCEMENT_BULLETS); + for (const bullet of shown) { + post += formatAnnouncementBullet(bullet) + "\n"; + } + + if (matched.bullets.length > MAX_ANNOUNCEMENT_BULLETS) { + post += `... And much more, see full list in changelog :memo:\n`; + } + + post += "\n"; + } + + post += `Thanks to everyone on QVAC team :green_heart: :qvac: :green_heart:\n`; + + const outPath = path.join(versionDir, "announcement-post.txt"); + fs.writeFileSync(outPath, post); + console.log(`✅ Generated ${outPath}`); + return outPath; +} + +/** + * Resolve the current package version from package.json. + * + * @param {string} packageName + * @returns {string} + */ +function readPackageVersion(packageName) { + const pkgPath = path.join( + getRepoRoot(), + "packages", + packageName, + "package.json", + ); + const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8")); + return pkg.version; +} + /** * Main function */ @@ -742,6 +1177,17 @@ async function main() { process.exit(0); } + if ("generate-announcement-post" in params) { + if (!params.package) { + console.error("--package is required with --generate-announcement-post"); + process.exit(1); + } + + const version = params.version || readPackageVersion(params.package); + const out = generateAnnouncementPost(params.package, version); + process.exit(out ? 0 : 1); + } + if (!params.package) { console.error("Usage:"); console.error( @@ -755,7 +1201,13 @@ async function main() { ); console.error(" --base-version Version label for base commit"); console.error(" --release-type minor or patch (auto-detected from package.json version)"); - console.error(" --update-root-changelog Update root CHANGELOG.md"); + console.error(" --update-root-changelog Update root CHANGELOG.md"); + console.error( + " --generate-announcement-post Generate announcement-post.txt for the package's current version", + ); + console.error( + " --version Override version when used with --generate-announcement-post", + ); process.exit(1); } @@ -821,5 +1273,7 @@ module.exports = { generateChangelogEntry, generateChangelogFiles, processSDKPRs, + parseChangelogMarkdown, + generateAnnouncementPost, SECTIONS, };