Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
0fa494e
feat[api]: add FLUX.2 multi-reference fusion and LoRA adapter support…
maxim-smotrov Apr 30, 2026
1eeab9a
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
c1af5b3
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
3203cd5
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
b2e9027
doc[skiplog]: trim verbose lora docs and prune zod-builtin tests
maxim-smotrov May 1, 2026
3afb686
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 1, 2026
30a07a9
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
fa3a516
test[api]: validate FLUX.2 fusion diverges from txt2img baseline and …
maxim-smotrov May 2, 2026
58702c2
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
bad99ea
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
b9267fc
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
9803db2
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
8e3bb82
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
22b8f46
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
bf375a5
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 4, 2026
cb91e51
Merge branch 'main' into feature/sdk-flux2-fusion-and-lora
maxim-smotrov May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion packages/sdk/client/api/diffusion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,38 @@ interface DiffusionResult {
* // modelConfig: { prediction: "flux2_flow" } })`).
* const { outputs } = diffusion({ modelId, prompt: "turn into watercolor", init_image: initImage });
*
* // FLUX.2 multi-reference fusion
* // IMPORTANT: requires the model loaded with `modelConfig: { prediction: "flux2_flow" }`
* // and a Qwen3 text encoder via `llmModelSrc` (same loadModel requirements as the
* // FLUX.2 img2img example above). `init_image` and `init_images` are mutually
* // exclusive — pass one or the other, not both.
* const refA = fs.readFileSync("scientist-a.jpg");
* const refB = fs.readFileSync("scientist-b.jpg");
* const { outputs } = diffusion({
* modelId,
* prompt: "a portrait using most visual traits from @image1 and the eyes from @image2",
* init_images: [refA, refB],
* width: 768,
* height: 768,
* });
*
* // LoRA adapter for this generation
* // Pass an absolute path to a local adapter you downloaded ahead of time.
* // Relative paths are rejected — the SDK runs across processes with
* // differing working directories, so an absolute path is required.
* // Whether the adapter persists across subsequent diffusion() calls is
* // controlled at loadModel time via `modelConfig.lora_apply_mode`.
* // Default is "auto", which delegates to stable-diffusion.cpp: it picks
* // "at_runtime" when the loaded model has quantized weights, and
* // "immediately" otherwise. See `sdcppConfigSchema.lora_apply_mode` for
* // the full description, and the @qvac/diffusion-cpp `LoraApplyMode`
* // type for the addon-level contract.
Comment thread
opaninakuffo marked this conversation as resolved.
Outdated
* const { outputs } = diffusion({
* modelId,
* prompt: "a watercolor cat",
* lora: "/home/user/loras/watercolor.safetensors",
* });
*
* // With progress tracking
* const { progressStream, outputs } = diffusion({ modelId, prompt: "a cat" });
* for await (const { step, totalSteps } of progressStream) {
Expand All @@ -53,10 +85,14 @@ interface DiffusionResult {
* ```
*/
export function diffusion(params: DiffusionClientParams): DiffusionResult {
const { init_image, ...rest } = params;
const { init_image, init_images, ...rest } = params;

const request: DiffusionStreamRequest = {
...rest,
...(init_image !== undefined && { init_image: encodeBase64(init_image) }),
...(init_images !== undefined && {
init_images: init_images.map(encodeBase64),
}),
type: "diffusionStream",
};

Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
},
"dependencies": {
"@qvac/decoder-audio": "^0.3.7",
"@qvac/diffusion-cpp": "^0.3.0",
"@qvac/diffusion-cpp": "^0.5.0",
"@qvac/embed-llamacpp": "^0.14.0",
"@qvac/error": "^0.1.1",
"@qvac/langdetect-text": "^0.1.2",
Expand Down
77 changes: 72 additions & 5 deletions packages/sdk/schemas/sdcpp-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import { modelSrcInputSchema } from "./model-src-utils";
const BASE64_PATTERN =
/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;

const ABSOLUTE_PATH_PATTERN = /^(\/|[A-Za-z]:[\\/]|\\\\)/;

export const sdcppConfigSchema = z
.object({
threads: z.number().optional(),
Expand All @@ -26,6 +28,20 @@ export const sdcppConfigSchema = z
vae_on_cpu: z.boolean().optional().describe("Force VAE decoder to run on CPU"),
vae_tiling: z.boolean().optional().describe("Enable VAE tiling for large images on limited VRAM"),
flash_attn: z.boolean().optional().describe("Enable flash attention to reduce memory usage"),
lora_apply_mode: z.enum(["auto", "immediately", "at_runtime"]).optional()
.describe(
"How LoRA adapters passed via diffusion({ lora }) are applied. " +
"'auto' (addon default): the underlying stable-diffusion.cpp library " +
"picks based on the loaded model's weight type — 'at_runtime' when " +
"any weights are quantized (q4_K, q5_K, q8_0, etc.), 'immediately' " +
"for full-precision weights (f16/f32/bf16). " +
"'immediately': adapter is fused into the model on first use and " +
"persists across subsequent diffusion() calls (even ones without " +
"lora) until the model is unloaded. " +
"'at_runtime': adapter is applied per-call and not persisted. " +
"See diffusion-cpp/index.d.ts (LoraApplyMode) and the upstream " +
"stable-diffusion.cpp `--lora-apply-mode` flag for the full contract.",
),
Comment thread
opaninakuffo marked this conversation as resolved.
verbosity: z.number().optional(),
clipLModelSrc: modelSrcInputSchema.optional()
.describe("CLIP-L text encoder model — required for SD3"),
Expand Down Expand Up @@ -210,7 +226,45 @@ export const diffusionRequestSchema = z.object({
.min(1)
.regex(BASE64_PATTERN)
.optional()
.describe("Base64-encoded image for img2img generation"),
.describe("Base64-encoded image for img2img generation. Mutually exclusive with init_images."),
init_images: z.array(
z.string().min(1).regex(BASE64_PATTERN),
)
.min(1)
.optional()
.describe(
"FLUX.2-only multi-reference fusion: array of base64-encoded PNG/JPEG buffers. " +
"Each buffer becomes a separate reference image that the FLUX.2 transformer attends to. " +
"Mutually exclusive with init_image; requires the model to be loaded with " +
"config.prediction='flux2_flow' and a Qwen3 text encoder via llmModelSrc.",
),
increase_ref_index: z.boolean().optional()
.describe(
"FLUX.2 fusion only. When omitted, the addon default (false) is used. When false, all " +
"reference latents share one RoPE index slot and blend via attention (recommended for " +
"FLUX.2-klein). When true, each reference gets its own RoPE index slot — use only with " +
"text encoders that receive per-image vision tokens.",
),
auto_resize_ref_image: z.boolean().optional()
.describe(
"FLUX.2 only. When omitted, the addon default (true) is used. When true, every reference " +
"image (single or fusion) is auto-resized to the target width/height before VAE-encoding. " +
"Disable only if the buffers are already at the exact target dimensions.",
),
lora: z
.string()
.min(1)
.regex(ABSOLUTE_PATH_PATTERN, {
message:
"lora must be an absolute path",
})
.optional()
.describe(
"Optional local LoRA adapter path to apply for this generation. " +
"Must be an absolute filesystem path. " +
"Whether the adapter persists across subsequent diffusion() calls is controlled " +
"by sdcppConfigSchema.lora_apply_mode (set at loadModel time).",
),
strength: z
.number()
.min(0)
Expand All @@ -219,7 +273,13 @@ export const diffusionRequestSchema = z.object({
.describe(
"img2img denoising strength (0.0 = keep source, 1.0 = ignore source); used by the SD/SDXL SDEdit path. No-op for FLUX.2, which uses in-context conditioning and ignores this field.",
),
});
}).refine(
(d) => d.init_image === undefined || d.init_images === undefined,
{
message:
"init_image and init_images are mutually exclusive — pass one or the other, not both.",
},
);

export type DiffusionRequest = z.input<typeof diffusionRequestSchema>;

Expand All @@ -231,6 +291,13 @@ export type DiffusionStreamRequest = z.input<
typeof diffusionStreamRequestSchema
>;

export type DiffusionClientParams = Omit<DiffusionRequest, "init_image"> & {
init_image?: Uint8Array;
};
type DiffusionClientParamsBase = Omit<
DiffusionRequest,
"init_image" | "init_images"
>;

export type DiffusionClientParams = DiffusionClientParamsBase &
(
| { init_image?: Uint8Array; init_images?: never }
| { init_image?: never; init_images?: Uint8Array[] }
);
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ export async function* diffusion(
? Buffer.from(request.init_image, "base64")
: undefined;

const init_images = request.init_images
? request.init_images.map((b64) => Buffer.from(b64, "base64"))
: undefined;

const response = await model.run({
prompt: request.prompt,
negative_prompt: request.negative_prompt,
Expand All @@ -34,7 +38,11 @@ export async function* diffusion(
vae_tiling: request.vae_tiling,
cache_preset: request.cache_preset,
init_image,
init_images,
increase_ref_index: request.increase_ref_index,
auto_resize_ref_image: request.auto_resize_ref_image,
strength: request.strength,
lora: request.lora,
});

let outputIndex = 0;
Expand Down
Loading
Loading