Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
0fa494e
feat[api]: add FLUX.2 multi-reference fusion and LoRA adapter support…
maxim-smotrov Apr 30, 2026
1eeab9a
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
c1af5b3
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
3203cd5
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
b2e9027
doc[skiplog]: trim verbose lora docs and prune zod-builtin tests
maxim-smotrov May 1, 2026
3afb686
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
30a07a9
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
fa3a516
test[api]: validate FLUX.2 fusion diverges from txt2img baseline and …
maxim-smotrov May 2, 2026
58702c2
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
bad99ea
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
b9267fc
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
9803db2
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
8e3bb82
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
22b8f46
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
bf375a5
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
cb91e51
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion packages/sdk/client/api/diffusion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,30 @@ interface DiffusionResult {
* // modelConfig: { prediction: "flux2_flow" } })`).
* const { outputs } = diffusion({ modelId, prompt: "turn into watercolor", init_image: initImage });
*
* // FLUX.2 multi-reference fusion
* // IMPORTANT: requires the model loaded with `modelConfig: { prediction: "flux2_flow" }`
* // and a Qwen3 text encoder via `llmModelSrc` (same loadModel requirements as the
* // FLUX.2 img2img example above). `init_image` and `init_images` are mutually
* // exclusive — pass one or the other, not both.
* const refA = fs.readFileSync("scientist-a.jpg");
* const refB = fs.readFileSync("scientist-b.jpg");
* const { outputs } = diffusion({
* modelId,
* prompt: "a portrait using most visual traits from @image1 and the eyes from @image2",
* init_images: [refA, refB],
* width: 768,
* height: 768,
* });
*
* // LoRA adapter for this generation (absolute path required).
* // Persistence across subsequent diffusion() calls is controlled at
* // loadModel time via `modelConfig.lora_apply_mode`.
* const { outputs } = diffusion({
* modelId,
* prompt: "a watercolor cat",
* lora: "/home/user/loras/watercolor.safetensors",
* });
*
* // With progress tracking
* const { progressStream, outputs } = diffusion({ modelId, prompt: "a cat" });
* for await (const { step, totalSteps } of progressStream) {
Expand All @@ -53,10 +77,14 @@ interface DiffusionResult {
* ```
*/
export function diffusion(params: DiffusionClientParams): DiffusionResult {
const { init_image, ...rest } = params;
const { init_image, init_images, ...rest } = params;

const request: DiffusionStreamRequest = {
...rest,
...(init_image !== undefined && { init_image: encodeBase64(init_image) }),
...(init_images !== undefined && {
init_images: init_images.map(encodeBase64),
}),
type: "diffusionStream",
};

Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
},
"dependencies": {
"@qvac/decoder-audio": "^0.3.7",
"@qvac/diffusion-cpp": "^0.3.0",
"@qvac/diffusion-cpp": "^0.5.0",
"@qvac/embed-llamacpp": "^0.14.0",
"@qvac/error": "^0.1.1",
"@qvac/langdetect-text": "^0.1.2",
Expand Down
73 changes: 68 additions & 5 deletions packages/sdk/schemas/sdcpp-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import { modelSrcInputSchema } from "./model-src-utils";
const BASE64_PATTERN =
/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;

const ABSOLUTE_PATH_PATTERN = /^(\/|[A-Za-z]:[\\/]|\\\\)/;

export const sdcppConfigSchema = z
.object({
threads: z.number().optional(),
Expand All @@ -26,6 +28,16 @@ export const sdcppConfigSchema = z
vae_on_cpu: z.boolean().optional().describe("Force VAE decoder to run on CPU"),
vae_tiling: z.boolean().optional().describe("Enable VAE tiling for large images on limited VRAM"),
flash_attn: z.boolean().optional().describe("Enable flash attention to reduce memory usage"),
lora_apply_mode: z.enum(["auto", "immediately", "at_runtime"]).optional()
.describe(
"How LoRA adapters passed via diffusion({ lora }) are applied. " +
"'auto' (default): picked based on weight type — 'at_runtime' for " +
"quantized weights, 'immediately' for full-precision. " +
"'immediately': adapter is fused into the model on first use and " +
"persists across subsequent diffusion() calls until the model is " +
"unloaded. " +
"'at_runtime': adapter is applied per-call and not persisted.",
),
Comment thread
opaninakuffo marked this conversation as resolved.
verbosity: z.number().optional(),
clipLModelSrc: modelSrcInputSchema.optional()
.describe("CLIP-L text encoder model — required for SD3"),
Expand Down Expand Up @@ -210,7 +222,45 @@ export const diffusionRequestSchema = z.object({
.min(1)
.regex(BASE64_PATTERN)
.optional()
.describe("Base64-encoded image for img2img generation"),
.describe("Base64-encoded image for img2img generation. Mutually exclusive with init_images."),
init_images: z.array(
z.string().min(1).regex(BASE64_PATTERN),
)
.min(1)
.optional()
.describe(
"FLUX.2-only multi-reference fusion: array of base64-encoded PNG/JPEG buffers. " +
"Each buffer becomes a separate reference image that the FLUX.2 transformer attends to. " +
"Mutually exclusive with init_image; requires the model to be loaded with " +
"config.prediction='flux2_flow' and a Qwen3 text encoder via llmModelSrc.",
),
increase_ref_index: z.boolean().optional()
.describe(
"FLUX.2 fusion only. When omitted, the addon default (false) is used. When false, all " +
"reference latents share one RoPE index slot and blend via attention (recommended for " +
"FLUX.2-klein). When true, each reference gets its own RoPE index slot — use only with " +
"text encoders that receive per-image vision tokens.",
),
auto_resize_ref_image: z.boolean().optional()
.describe(
"FLUX.2 only. When omitted, the addon default (true) is used. When true, every reference " +
"image (single or fusion) is auto-resized to the target width/height before VAE-encoding. " +
"Disable only if the buffers are already at the exact target dimensions.",
),
lora: z
.string()
.min(1)
.regex(ABSOLUTE_PATH_PATTERN, {
message:
"lora must be an absolute path",
})
.optional()
.describe(
"Optional local LoRA adapter path to apply for this generation. " +
"Must be an absolute filesystem path. " +
"Whether the adapter persists across subsequent diffusion() calls is controlled " +
"by sdcppConfigSchema.lora_apply_mode (set at loadModel time).",
),
strength: z
.number()
.min(0)
Expand All @@ -219,7 +269,13 @@ export const diffusionRequestSchema = z.object({
.describe(
"img2img denoising strength (0.0 = keep source, 1.0 = ignore source); used by the SD/SDXL SDEdit path. No-op for FLUX.2, which uses in-context conditioning and ignores this field.",
),
});
}).refine(
(d) => d.init_image === undefined || d.init_images === undefined,
{
message:
"init_image and init_images are mutually exclusive — pass one or the other, not both.",
},
);

export type DiffusionRequest = z.input<typeof diffusionRequestSchema>;

Expand All @@ -231,6 +287,13 @@ export type DiffusionStreamRequest = z.input<
typeof diffusionStreamRequestSchema
>;

export type DiffusionClientParams = Omit<DiffusionRequest, "init_image"> & {
init_image?: Uint8Array;
};
type DiffusionClientParamsBase = Omit<
DiffusionRequest,
"init_image" | "init_images"
>;

export type DiffusionClientParams = DiffusionClientParamsBase &
(
| { init_image?: Uint8Array; init_images?: never }
| { init_image?: never; init_images?: Uint8Array[] }
);
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ export async function* diffusion(
? Buffer.from(request.init_image, "base64")
: undefined;

const init_images = request.init_images
? request.init_images.map((b64) => Buffer.from(b64, "base64"))
: undefined;

const response = await model.run({
prompt: request.prompt,
negative_prompt: request.negative_prompt,
Expand All @@ -34,7 +38,11 @@ export async function* diffusion(
vae_tiling: request.vae_tiling,
cache_preset: request.cache_preset,
init_image,
init_images,
increase_ref_index: request.increase_ref_index,
auto_resize_ref_image: request.auto_resize_ref_image,
strength: request.strength,
lora: request.lora,
});

let outputIndex = 0;
Expand Down
175 changes: 175 additions & 0 deletions packages/sdk/test/unit/sdcpp-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { z } from "zod";
import {
sdcppConfigSchema,
diffusionRequestSchema,
diffusionStreamRequestSchema,
diffusionStreamResponseSchema,
diffusionStatsSchema,
modelInfoSchema,
Expand Down Expand Up @@ -320,6 +321,180 @@ test("diffusionRequestSchema: accepts strength at boundaries (0 and 1)", (t) =>
t.is(resultOne.success, true);
});

// ---- LoRA + multi-reference fusion (FLUX.2) ----

test("diffusionRequestSchema: accepts lora as POSIX absolute path", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "/home/user/loras/watercolor.safetensors",
});
t.is(result.success, true);
});

test("diffusionRequestSchema: accepts lora as Windows drive-letter path", (t) => {
Comment thread
opaninakuffo marked this conversation as resolved.
const resultBackslash = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "C:\\models\\loras\\watercolor.safetensors",
});
t.is(resultBackslash.success, true);

const resultForwardSlash = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "C:/models/loras/watercolor.safetensors",
});
t.is(resultForwardSlash.success, true);
});

test("diffusionRequestSchema: accepts lora as Windows UNC path", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "\\\\server\\share\\loras\\watercolor.safetensors",
});
t.is(result.success, true);
});

test("diffusionRequestSchema: rejects lora as bare filename", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "my-lora.safetensors",
});
t.is(result.success, false);
if (!result.success) {
t.ok(
/must be an absolute path/.test(result.error.issues[0]!.message),
"error message explains lora must be absolute",
);
}
});

test("diffusionRequestSchema: rejects lora as relative path", (t) => {
const resultDot = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "./loras/watercolor.safetensors",
});
t.is(resultDot.success, false);

const resultParent = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "../loras/watercolor.safetensors",
});
t.is(resultParent.success, false);

const resultSubdir = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
lora: "loras/watercolor.safetensors",
});
t.is(resultSubdir.success, false);
});

test("diffusionRequestSchema: accepts init_images with multiple base64 buffers", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "blend @image1 and @image2",
init_images: ["iVBORw0KGgoAAAANSUhEUg==", "/9j/4AAQSkZJRgABAQEASABIAAA="],
});
t.is(result.success, true);
});

test("diffusionRequestSchema: accepts increase_ref_index boolean", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
increase_ref_index: true,
});
t.is(result.success, true);
});

test("diffusionRequestSchema: accepts auto_resize_ref_image boolean", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "a cat",
init_image: "iVBORw0KGgoAAAANSUhEUg==",
auto_resize_ref_image: false,
});
t.is(result.success, true);
});

test("diffusionRequestSchema: rejects when init_image and init_images are both set", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "blend two refs",
init_image: "iVBORw0KGgoAAAANSUhEUg==",
init_images: ["iVBORw0KGgoAAAANSUhEUg==", "/9j/4AAQSkZJRgABAQEASABIAAA="],
});
t.is(result.success, false);
if (!result.success) {
const messages = result.error.issues.map((i) => i.message).join(" | ");
t.ok(
messages.includes("mutually exclusive"),
`expected mutual-exclusion message, got: ${messages}`,
);
}
});

test("diffusionRequestSchema: accepts init_image alone (mutual exclusion not triggered)", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "img2img",
init_image: "iVBORw0KGgoAAAANSUhEUg==",
});
t.is(result.success, true);
});

test("diffusionRequestSchema: accepts init_images alone (mutual exclusion not triggered)", (t) => {
const result = diffusionRequestSchema.safeParse({
modelId: "model-1",
prompt: "fusion",
init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
});
t.is(result.success, true);
});

// ---- diffusionStreamRequestSchema ----

test("diffusionStreamRequestSchema: accepts a valid stream request with type literal", (t) => {
Comment thread
maxim-smotrov marked this conversation as resolved.
const result = diffusionStreamRequestSchema.safeParse({
type: "diffusionStream",
modelId: "model-1",
prompt: "a cat",
});
t.is(result.success, true);
});

test("diffusionStreamRequestSchema: rejects when init_image and init_images are both set", (t) => {
const result = diffusionStreamRequestSchema.safeParse({
type: "diffusionStream",
modelId: "model-1",
prompt: "a cat",
init_image: "iVBORw0KGgoAAAANSUhEUg==",
init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
});
t.is(result.success, false);
if (!result.success) {
const messages = result.error.issues.map((i) => i.message).join(" | ");
t.ok(
messages.includes("mutually exclusive"),
`expected mutual-exclusion message, got: ${messages}`,
);
}
});

// ---- sdcppConfigSchema: lora_apply_mode ----

test("sdcppConfigSchema: accepts lora_apply_mode", (t) => {
const result = sdcppConfigSchema.safeParse({ lora_apply_mode: "auto" });
t.is(result.success, true);
});

// ============================================
// diffusionStreamResponseSchema
// ============================================
Expand Down
Loading
Loading