tetherto · donriddo · Mar 12, 2026 · Mar 12, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/packages/lib-infer-diffusion/index.js b/packages/lib-infer-diffusion/index.js
@@ -72,14 +72,15 @@ class ImgStableDiffusion extends BaseInference {
       // for SD3 split) the caller is using a pure diffusion GGUF that must be
       // loaded via diffusion_model_path.
       const isSplitLayout = !!this._llmModel || !!this._t5XxlModel
+      const resolve = (name) => name ? (path.isAbsolute(name) ? name : path.join(this._diskPath, name)) : ''
       const configurationParams = {
-        path: isSplitLayout ? '' : path.join(this._diskPath, this._modelName),
-        diffusionModelPath: isSplitLayout ? path.join(this._diskPath, this._modelName) : '',
-        clipLPath: this._clipLModel ? path.join(this._diskPath, this._clipLModel) : '',
-        clipGPath: this._clipGModel ? path.join(this._diskPath, this._clipGModel) : '',
-        t5XxlPath: this._t5XxlModel ? path.join(this._diskPath, this._t5XxlModel) : '',
-        llmPath: this._llmModel ? path.join(this._diskPath, this._llmModel) : '',
-        vaePath: this._vaeModel ? path.join(this._diskPath, this._vaeModel) : '',
+        path: isSplitLayout ? '' : resolve(this._modelName),
+        diffusionModelPath: isSplitLayout ? resolve(this._modelName) : '',
+        clipLPath: resolve(this._clipLModel),
+        clipGPath: resolve(this._clipGModel),
+        t5XxlPath: resolve(this._t5XxlModel),
+        llmPath: resolve(this._llmModel),
+        vaePath: resolve(this._vaeModel),
         config: this._config
       }
 

diff --git a/packages/sdk/bun.lock b/packages/sdk/bun.lock
diff --git a/packages/sdk/client/api/generation.ts b/packages/sdk/client/api/generation.ts
@@ -0,0 +1,199 @@
+import {
+  generationStreamResponseSchema,
+  type GenerationStreamRequest,
+  type GenerationClientParams,
+  type DiffusionStats,
+} from "@/schemas";
+import { stream as streamRpc } from "@/client/rpc/rpc-client";
+
+export interface GenerationProgressTick {
+  step: number;
+  totalSteps: number;
+  elapsedMs: number;
+}
+
+interface GenerationResult {
+  outputStream: AsyncGenerator<{ data: string; outputIndex: number }>;
+  progressStream: AsyncGenerator<GenerationProgressTick>;
+  outputs: Promise<Buffer[]>;
+  stats: Promise<DiffusionStats | undefined>;
+}
+
+/**
+ * Generate outputs using a loaded diffusion model.
+ *
+ * Supports text-to-image, image-to-image, and (future) video generation.
+ * img2img is activated by providing `init_image` (and optionally `strength`).
+ *
+ * @param params - Generation parameters
+ * @param params.modelId - The identifier of the loaded diffusion model
+ * @param params.prompt - Text prompt describing the desired output
+ * @param params.init_image - Source image for img2img (base64 string or Buffer). Omit for txt2img.
+ * @param params.strength - How much to transform the source: 0 = keep, 1 = ignore. Only used with init_image.
+ * @param params.stream - Whether to stream outputs as they arrive (true) or return all at once (false). Defaults to false.
+ * @returns Object with outputStream generator, progressStream generator, outputs promise, and stats promise
+ * @example
+ * ```typescript
+ * // txt2img (non-streaming)
+ * const { outputs, stats } = generation({ modelId, prompt: "a cat" });
+ * const buffers = await outputs;
+ *
+ * // txt2img (streaming with progress)
+ * const { outputStream, progressStream } = generation({ modelId, prompt: "a cat", stream: true });
+ * // consume progress in parallel
+ * (async () => { for await (const { step, totalSteps } of progressStream) console.log(`${step}/${totalSteps}`); })();
+ * for await (const { data, outputIndex } of outputStream) {
+ *   fs.writeFileSync(`output_${outputIndex}.png`, Buffer.from(data, "base64"));
+ * }
+ *
+ * // img2img
+ * const { outputs } = generation({
+ *   modelId,
+ *   prompt: "watercolor style",
+ *   init_image: fs.readFileSync("photo.jpg"),
+ *   strength: 0.75,
+ * });
+ * ```
+ */
+export function generation(params: GenerationClientParams): GenerationResult {
+  const { stream: streaming, init_image, ...rest } = params;
+
+  const request: GenerationStreamRequest = {
+    type: "generationStream",
+    ...rest,
+    ...(init_image != null && {
+      init_image:
+        typeof init_image === "string"
+          ? init_image
+          : init_image.toString("base64"),
+    }),
+  };
+
+  let statsResolver: (value: DiffusionStats | undefined) => void = () => {};
+  let statsRejecter: (error: unknown) => void = () => {};
+  const statsPromise = new Promise<DiffusionStats | undefined>(
+    (resolve, reject) => {
+      statsResolver = resolve;
+      statsRejecter = reject;
+    },
+  );
+  statsPromise.catch(() => {});
+
+  const outputQueue: { data: string; outputIndex: number }[] = [];
+  const progressQueue: GenerationProgressTick[] = [];
+  const collectedBuffers: Buffer[] = [];
+  let outputDone = false;
+  let progressDone = false;
+  let outputResolve: (() => void) | null = null;
+  let progressResolve: (() => void) | null = null;
+  let streamError: Error | null = null;
+
+  let outputsResolver: (value: Buffer[]) => void = () => {};
+  let outputsRejecter: (error: unknown) => void = () => {};
+  const outputsPromise = new Promise<Buffer[]>((resolve, reject) => {
+    outputsResolver = resolve;
+    outputsRejecter = reject;
+  });
+  outputsPromise.catch(() => {});
+
+  const processResponses = async () => {
+    try {
+      for await (const response of streamRpc(request)) {
+        if (
+          response &&
+          typeof response === "object" &&
+          "type" in response &&
+          response.type === "generationStream"
+        ) {
+          const parsed = generationStreamResponseSchema.parse(response);
+
+          if (parsed.step != null && parsed.totalSteps != null && parsed.elapsedMs != null) {
+            progressQueue.push({ step: parsed.step, totalSteps: parsed.totalSteps, elapsedMs: parsed.elapsedMs });
+            if (progressResolve) {
+              progressResolve();
+              progressResolve = null;
+            }
+          }
+
+          if (parsed.data) {
+            const outputEntry = { data: parsed.data, outputIndex: parsed.outputIndex ?? 0 };
+            outputQueue.push(outputEntry);
+            collectedBuffers.push(Buffer.from(parsed.data, "base64"));
+            if (outputResolve) {
+              outputResolve();
+              outputResolve = null;
+            }
+          }
+
+          if (parsed.done) {
+            statsResolver(parsed.stats);
+            outputsResolver(collectedBuffers);
+          }
+        }
+      }
+    } catch (error) {
+      streamError = error instanceof Error ? error : new Error(String(error));
+      statsRejecter(streamError);
+      outputsRejecter(streamError);
+    }
+
+    outputDone = true;
+    progressDone = true;
+    if (outputResolve) {
+      outputResolve();
+      outputResolve = null;
+    }
+    if (progressResolve) {
+      progressResolve();
+      progressResolve = null;
+    }
+  };
+
+  void processResponses();
+
+  const progressStream = (async function* (): AsyncGenerator<GenerationProgressTick> {
+    while (true) {
+      if (progressQueue.length > 0) {
+        yield progressQueue.shift()!;
+      } else if (progressDone) {
+        if (streamError) throw streamError;
+        return;
+      } else {
+        await new Promise<void>((resolve) => { progressResolve = resolve; });
+      }
+    }
+  })();
+
+  if (streaming) {
+    const outputStream = (async function* () {
+      while (true) {
+        if (outputQueue.length > 0) {
+          yield outputQueue.shift()!;
+        } else if (outputDone) {
+          if (streamError) throw streamError;
+          return;
+        } else {
+          await new Promise<void>((resolve) => { outputResolve = resolve; });
+        }
+      }
+    })();
+
+    return {
+      outputStream,
+      progressStream,
+      outputs: outputsPromise,
+      stats: statsPromise,
+    };
+  }
+
+  const outputStream = (async function* () {
+    // Empty generator for non-streaming mode
+  })();
+
+  return {
+    outputStream,
+    progressStream,
+    outputs: outputsPromise,
+    stats: statsPromise,
+  };
+}
diff --git a/packages/sdk/client/api/index.ts b/packages/sdk/client/api/index.ts
@@ -26,6 +26,7 @@ export { textToSpeech } from "./text-to-speech";
 export { getModelInfo } from "./get-model-info";
 export { ocr } from "./ocr";
 export { invokePlugin, invokePluginStream } from "./invoke-plugin";
+export { generation, type GenerationProgressTick } from "./generation";
 export {
   modelRegistryList,
   modelRegistrySearch,

diff --git a/packages/sdk/examples/diffusion-flux2-klein.ts b/packages/sdk/examples/diffusion-flux2-klein.ts
@@ -0,0 +1,49 @@
+import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
+import fs from "fs";
+import path from "path";
+
+// FLUX.2 [klein] uses a split-layout: separate diffusion model + LLM text encoder + VAE
+const diffusionModelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;
+const llmModelSrc = process.argv[3] || QWEN3_4B_Q4_K_M;
+const vaeModelSrc = process.argv[4] || FLUX_2_KLEIN_4B_VAE;
+const prompt = process.argv[5] || "a futuristic city at sunset, photorealistic";
+const outputDir = process.argv[6] || ".";
+
+console.log("Loading FLUX.2 [klein] split-layout model...");
+
+const modelId = await loadModel({
+  modelSrc: diffusionModelSrc,
+  modelType: "diffusion",
+  modelConfig: {
+    device: "gpu",
+    threads: 4,
+    llmModelSrc,
+    vaeModelSrc,
+  },
+  onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
+});
+console.log(`Model loaded: ${modelId}`);
+
+console.log(`\nGenerating: "${prompt}"`);
+
+const { outputStream, stats } = generation({
+  modelId,
+  prompt,
+  width: 512,
+  height: 512,
+  steps: 20,
+  guidance: 3.5,
+  seed: -1,
+  stream: true,
+});
+
+for await (const { data, outputIndex } of outputStream) {
+  const outputPath = path.join(outputDir, `flux2_${outputIndex}.png`);
+  fs.writeFileSync(outputPath, Buffer.from(data, "base64"));
+  console.log(`Saved: ${outputPath}`);
+}
+
+console.log("\nStats:", await stats);
+await unloadModel({ modelId, clearStorage: false });
+console.log("Done.");
+process.exit(0);
diff --git a/packages/sdk/examples/diffusion-img2img.ts b/packages/sdk/examples/diffusion-img2img.ts
@@ -0,0 +1,54 @@
+import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
+import fs from "fs";
+import path from "path";
+
+const modelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;
+const inputImagePath = process.argv[3] || path.resolve("examples/image/test.jpg");
+
+if (!fs.existsSync(inputImagePath)) {
+  console.error(`Input image not found: ${inputImagePath}`);
+  console.error(
+    "Usage: bun run examples/diffusion-img2img.ts [model-src] [input-image] [prompt] [strength] [output-dir]",
+  );
+  process.exit(1);
+}
+
+const prompt = process.argv[4] || "watercolor painting style";
+const strength = parseFloat(process.argv[5] || "0.75");
+const outputDir = process.argv[6] || ".";
+
+console.log(`Loading diffusion model...`);
+// FLUX.2 models require companion LLM + VAE models
+const modelId = await loadModel({
+  modelSrc,
+  modelType: "diffusion",
+  modelConfig: { device: "gpu", threads: 4, llmModelSrc: QWEN3_4B_Q4_K_M, vaeModelSrc: FLUX_2_KLEIN_4B_VAE },
+  onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
+});
+console.log(`Model loaded: ${modelId}`);
+
+console.log(`\nimg2img from: ${inputImagePath}`);
+console.log(`Prompt: "${prompt}", strength: ${strength}`);
+
+const { outputs, stats } = generation({
+  modelId,
+  prompt,
+  init_image: fs.readFileSync(inputImagePath),
+  strength,
+  width: 512,
+  height: 512,
+  steps: 20,
+  cfg_scale: 7.0,
+});
+
+const buffers = await outputs;
+for (let i = 0; i < buffers.length; i++) {
+  const outputPath = path.join(outputDir, `img2img_${i}.png`);
+  fs.writeFileSync(outputPath, buffers[i]!);
+  console.log(`Saved: ${outputPath}`);
+}
+
+console.log("\nStats:", await stats);
+await unloadModel({ modelId, clearStorage: false });
+console.log("Done.");
+process.exit(0);
diff --git a/packages/sdk/examples/diffusion-txt2img.ts b/packages/sdk/examples/diffusion-txt2img.ts
@@ -0,0 +1,44 @@
+import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
+import fs from "fs";
+import path from "path";
+
+const modelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;
+
+const prompt =
+  process.argv[3] ||
+  "a photo of a cat sitting on a windowsill, golden hour lighting";
+const outputDir = process.argv[4] || ".";
+
+console.log(`Loading diffusion model...`);
+// FLUX.2 models require companion LLM + VAE models
+const modelId = await loadModel({
+  modelSrc,
+  modelType: "diffusion",
+  modelConfig: { device: "gpu", threads: 4, llmModelSrc: QWEN3_4B_Q4_K_M, vaeModelSrc: FLUX_2_KLEIN_4B_VAE },
+  onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
+});
+console.log(`Model loaded: ${modelId}`);
+
+console.log(`\nGenerating: "${prompt}"`);
+
+const { outputStream, stats } = generation({
+  modelId,
+  prompt,
+  width: 512,
+  height: 512,
+  steps: 20,
+  cfg_scale: 7.0,
+  seed: -1,
+  stream: true,
+});
+
+for await (const { data, outputIndex } of outputStream) {
+  const outputPath = path.join(outputDir, `output_${outputIndex}.png`);
+  fs.writeFileSync(outputPath, Buffer.from(data, "base64"));
+  console.log(`Saved: ${outputPath}`);
+}
+
+console.log("\nStats:", await stats);
+await unloadModel({ modelId, clearStorage: false });
+console.log("Done.");
+process.exit(0);