Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
e6af32d
feat: add diffusion SDK plugin integration
donriddo Mar 12, 2026
aeeffc7
feat(diffusion): consolidate SDK plugin, fix sampling_method schema, …
donriddo Mar 12, 2026
4657cd2
fix(diffusion): register generationStream in bare-client handler map
donriddo Mar 13, 2026
5b22891
feat(diffusion): add diffusion naming handler for update-models codegen
donriddo Mar 13, 2026
5b356ad
feat(diffusion): sync registry models and use FLUX constant in tests
donriddo Mar 13, 2026
c469b71
fix(diffusion): prevent statsPromise hang and fix lint issues
donriddo Mar 15, 2026
7d245e0
revert: remove non-matching patterns from generation client
donriddo Mar 15, 2026
2955e88
chore: remove unrelated model history file
donriddo Mar 15, 2026
38c2c86
fix: configure FLUX companion models and GPU device for diffusion tests
donriddo Mar 15, 2026
8a9608e
fix(examples): configure FLUX companion models consistently across al…
donriddo Mar 16, 2026
7637e20
fix(tests): use llm addon elephant.jpg for img2img test fixture
donriddo Mar 16, 2026
48ee112
fix(examples): use path.resolve for img2img default image path
donriddo Mar 16, 2026
7a7e8a5
fix(tests): migrate generation executor to ResourceManager pattern
donriddo Mar 16, 2026
6c8626f
feat(api): expose progressStream in generation() client helper
donriddo Mar 16, 2026
7481157
refactor(api): use background fan-out loop for generation() streams
donriddo Mar 16, 2026
ab1b4f4
chore: regenerate bun.lock and models registry after rebase
donriddo Mar 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions packages/lib-infer-diffusion/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,15 @@ class ImgStableDiffusion extends BaseInference {
// for SD3 split) the caller is using a pure diffusion GGUF that must be
// loaded via diffusion_model_path.
const isSplitLayout = !!this._llmModel || !!this._t5XxlModel
const resolve = (name) => name ? (path.isAbsolute(name) ? name : path.join(this._diskPath, name)) : ''
const configurationParams = {
path: isSplitLayout ? '' : path.join(this._diskPath, this._modelName),
diffusionModelPath: isSplitLayout ? path.join(this._diskPath, this._modelName) : '',
clipLPath: this._clipLModel ? path.join(this._diskPath, this._clipLModel) : '',
clipGPath: this._clipGModel ? path.join(this._diskPath, this._clipGModel) : '',
t5XxlPath: this._t5XxlModel ? path.join(this._diskPath, this._t5XxlModel) : '',
llmPath: this._llmModel ? path.join(this._diskPath, this._llmModel) : '',
vaePath: this._vaeModel ? path.join(this._diskPath, this._vaeModel) : '',
path: isSplitLayout ? '' : resolve(this._modelName),
diffusionModelPath: isSplitLayout ? resolve(this._modelName) : '',
clipLPath: resolve(this._clipLModel),
clipGPath: resolve(this._clipGModel),
t5XxlPath: resolve(this._t5XxlModel),
llmPath: resolve(this._llmModel),
vaePath: resolve(this._vaeModel),
config: this._config
}

Expand Down
124 changes: 124 additions & 0 deletions packages/sdk/bun.lock

Large diffs are not rendered by default.

199 changes: 199 additions & 0 deletions packages/sdk/client/api/generation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import {
generationStreamResponseSchema,
type GenerationStreamRequest,
type GenerationClientParams,
type DiffusionStats,
} from "@/schemas";
import { stream as streamRpc } from "@/client/rpc/rpc-client";

export interface GenerationProgressTick {
step: number;
totalSteps: number;
elapsedMs: number;
}

interface GenerationResult {
outputStream: AsyncGenerator<{ data: string; outputIndex: number }>;
progressStream: AsyncGenerator<GenerationProgressTick>;
outputs: Promise<Buffer[]>;
stats: Promise<DiffusionStats | undefined>;
}

/**
* Generate outputs using a loaded diffusion model.
*
* Supports text-to-image, image-to-image, and (future) video generation.
* img2img is activated by providing `init_image` (and optionally `strength`).
*
* @param params - Generation parameters
* @param params.modelId - The identifier of the loaded diffusion model
* @param params.prompt - Text prompt describing the desired output
* @param params.init_image - Source image for img2img (base64 string or Buffer). Omit for txt2img.
* @param params.strength - How much to transform the source: 0 = keep, 1 = ignore. Only used with init_image.
* @param params.stream - Whether to stream outputs as they arrive (true) or return all at once (false). Defaults to false.
* @returns Object with outputStream generator, progressStream generator, outputs promise, and stats promise
* @example
* ```typescript
* // txt2img (non-streaming)
* const { outputs, stats } = generation({ modelId, prompt: "a cat" });
* const buffers = await outputs;
*
* // txt2img (streaming with progress)
* const { outputStream, progressStream } = generation({ modelId, prompt: "a cat", stream: true });
* // consume progress in parallel
* (async () => { for await (const { step, totalSteps } of progressStream) console.log(`${step}/${totalSteps}`); })();
* for await (const { data, outputIndex } of outputStream) {
* fs.writeFileSync(`output_${outputIndex}.png`, Buffer.from(data, "base64"));
* }
*
* // img2img
* const { outputs } = generation({
* modelId,
* prompt: "watercolor style",
* init_image: fs.readFileSync("photo.jpg"),
* strength: 0.75,
* });
* ```
*/
export function generation(params: GenerationClientParams): GenerationResult {
const { stream: streaming, init_image, ...rest } = params;

const request: GenerationStreamRequest = {
type: "generationStream",
...rest,
...(init_image != null && {
init_image:
typeof init_image === "string"
? init_image
: init_image.toString("base64"),
}),
};

let statsResolver: (value: DiffusionStats | undefined) => void = () => {};
let statsRejecter: (error: unknown) => void = () => {};
const statsPromise = new Promise<DiffusionStats | undefined>(
(resolve, reject) => {
statsResolver = resolve;
statsRejecter = reject;
},
);
statsPromise.catch(() => {});

const outputQueue: { data: string; outputIndex: number }[] = [];
const progressQueue: GenerationProgressTick[] = [];
const collectedBuffers: Buffer[] = [];
let outputDone = false;
let progressDone = false;
let outputResolve: (() => void) | null = null;
let progressResolve: (() => void) | null = null;
let streamError: Error | null = null;

let outputsResolver: (value: Buffer[]) => void = () => {};
let outputsRejecter: (error: unknown) => void = () => {};
const outputsPromise = new Promise<Buffer[]>((resolve, reject) => {
outputsResolver = resolve;
outputsRejecter = reject;
});
outputsPromise.catch(() => {});

const processResponses = async () => {
try {
for await (const response of streamRpc(request)) {
if (
response &&
typeof response === "object" &&
"type" in response &&
response.type === "generationStream"
) {
const parsed = generationStreamResponseSchema.parse(response);

if (parsed.step != null && parsed.totalSteps != null && parsed.elapsedMs != null) {
progressQueue.push({ step: parsed.step, totalSteps: parsed.totalSteps, elapsedMs: parsed.elapsedMs });
if (progressResolve) {
progressResolve();
progressResolve = null;
}
}

if (parsed.data) {
const outputEntry = { data: parsed.data, outputIndex: parsed.outputIndex ?? 0 };
outputQueue.push(outputEntry);
collectedBuffers.push(Buffer.from(parsed.data, "base64"));
if (outputResolve) {
outputResolve();
outputResolve = null;
}
}

if (parsed.done) {
statsResolver(parsed.stats);
outputsResolver(collectedBuffers);
}
}
}
} catch (error) {
streamError = error instanceof Error ? error : new Error(String(error));
statsRejecter(streamError);
outputsRejecter(streamError);
}

outputDone = true;
progressDone = true;
if (outputResolve) {
outputResolve();
outputResolve = null;
}
if (progressResolve) {
progressResolve();
progressResolve = null;
}
};

void processResponses();

const progressStream = (async function* (): AsyncGenerator<GenerationProgressTick> {
while (true) {
if (progressQueue.length > 0) {
yield progressQueue.shift()!;
} else if (progressDone) {
if (streamError) throw streamError;
return;
} else {
await new Promise<void>((resolve) => { progressResolve = resolve; });
}
}
})();

if (streaming) {
const outputStream = (async function* () {
while (true) {
if (outputQueue.length > 0) {
yield outputQueue.shift()!;
} else if (outputDone) {
if (streamError) throw streamError;
return;
} else {
await new Promise<void>((resolve) => { outputResolve = resolve; });
}
}
})();

return {
outputStream,
progressStream,
outputs: outputsPromise,
stats: statsPromise,
};
}

const outputStream = (async function* () {
// Empty generator for non-streaming mode
})();

return {
outputStream,
progressStream,
outputs: outputsPromise,
stats: statsPromise,
};
}
1 change: 1 addition & 0 deletions packages/sdk/client/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export { textToSpeech } from "./text-to-speech";
export { getModelInfo } from "./get-model-info";
export { ocr } from "./ocr";
export { invokePlugin, invokePluginStream } from "./invoke-plugin";
export { generation, type GenerationProgressTick } from "./generation";
export {
modelRegistryList,
modelRegistrySearch,
Expand Down
49 changes: 49 additions & 0 deletions packages/sdk/examples/diffusion-flux2-klein.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
import fs from "fs";
import path from "path";

// FLUX.2 [klein] uses a split-layout: separate diffusion model + LLM text encoder + VAE
const diffusionModelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;
const llmModelSrc = process.argv[3] || QWEN3_4B_Q4_K_M;
const vaeModelSrc = process.argv[4] || FLUX_2_KLEIN_4B_VAE;
const prompt = process.argv[5] || "a futuristic city at sunset, photorealistic";
const outputDir = process.argv[6] || ".";

console.log("Loading FLUX.2 [klein] split-layout model...");

const modelId = await loadModel({
modelSrc: diffusionModelSrc,
modelType: "diffusion",
modelConfig: {
device: "gpu",
threads: 4,
llmModelSrc,
vaeModelSrc,
},
onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
});
console.log(`Model loaded: ${modelId}`);

console.log(`\nGenerating: "${prompt}"`);

const { outputStream, stats } = generation({
modelId,
prompt,
width: 512,
height: 512,
steps: 20,
guidance: 3.5,
seed: -1,
stream: true,
});

for await (const { data, outputIndex } of outputStream) {
const outputPath = path.join(outputDir, `flux2_${outputIndex}.png`);
fs.writeFileSync(outputPath, Buffer.from(data, "base64"));
console.log(`Saved: ${outputPath}`);
}

console.log("\nStats:", await stats);
await unloadModel({ modelId, clearStorage: false });
console.log("Done.");
process.exit(0);
54 changes: 54 additions & 0 deletions packages/sdk/examples/diffusion-img2img.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
import fs from "fs";
import path from "path";

const modelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;
const inputImagePath = process.argv[3] || path.resolve("examples/image/test.jpg");

if (!fs.existsSync(inputImagePath)) {
console.error(`Input image not found: ${inputImagePath}`);
console.error(
"Usage: bun run examples/diffusion-img2img.ts [model-src] [input-image] [prompt] [strength] [output-dir]",
);
process.exit(1);
}

const prompt = process.argv[4] || "watercolor painting style";
const strength = parseFloat(process.argv[5] || "0.75");
const outputDir = process.argv[6] || ".";

console.log(`Loading diffusion model...`);
// FLUX.2 models require companion LLM + VAE models
const modelId = await loadModel({
modelSrc,
modelType: "diffusion",
modelConfig: { device: "gpu", threads: 4, llmModelSrc: QWEN3_4B_Q4_K_M, vaeModelSrc: FLUX_2_KLEIN_4B_VAE },
onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
});
console.log(`Model loaded: ${modelId}`);

console.log(`\nimg2img from: ${inputImagePath}`);
console.log(`Prompt: "${prompt}", strength: ${strength}`);

const { outputs, stats } = generation({
modelId,
prompt,
init_image: fs.readFileSync(inputImagePath),
strength,
width: 512,
height: 512,
steps: 20,
cfg_scale: 7.0,
});

const buffers = await outputs;
for (let i = 0; i < buffers.length; i++) {
const outputPath = path.join(outputDir, `img2img_${i}.png`);
fs.writeFileSync(outputPath, buffers[i]!);
console.log(`Saved: ${outputPath}`);
}

console.log("\nStats:", await stats);
await unloadModel({ modelId, clearStorage: false });
console.log("Done.");
process.exit(0);
44 changes: 44 additions & 0 deletions packages/sdk/examples/diffusion-txt2img.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { loadModel, unloadModel, generation, FLUX_2_KLEIN_4B_Q4_0, FLUX_2_KLEIN_4B_VAE, QWEN3_4B_Q4_K_M } from "@qvac/sdk";
import fs from "fs";
import path from "path";

const modelSrc = process.argv[2] || FLUX_2_KLEIN_4B_Q4_0;

const prompt =
process.argv[3] ||
"a photo of a cat sitting on a windowsill, golden hour lighting";
const outputDir = process.argv[4] || ".";

console.log(`Loading diffusion model...`);
// FLUX.2 models require companion LLM + VAE models
const modelId = await loadModel({
modelSrc,
modelType: "diffusion",
modelConfig: { device: "gpu", threads: 4, llmModelSrc: QWEN3_4B_Q4_K_M, vaeModelSrc: FLUX_2_KLEIN_4B_VAE },
onProgress: (p) => console.log(`Loading: ${p.percentage.toFixed(1)}%`),
});
console.log(`Model loaded: ${modelId}`);

console.log(`\nGenerating: "${prompt}"`);

const { outputStream, stats } = generation({
modelId,
prompt,
width: 512,
height: 512,
steps: 20,
cfg_scale: 7.0,
seed: -1,
stream: true,
});

for await (const { data, outputIndex } of outputStream) {
const outputPath = path.join(outputDir, `output_${outputIndex}.png`);
fs.writeFileSync(outputPath, Buffer.from(data, "base64"));
console.log(`Saved: ${outputPath}`);
}

console.log("\nStats:", await stats);
await unloadModel({ modelId, clearStorage: false });
console.log("Done.");
process.exit(0);
Loading
Loading