tetherto · maxim-smotrov · May 5, 2026 · Apr 30, 2026 · May 1, 2026 · May 1, 2026
@@ -44,6 +44,30 @@ interface DiffusionResult {
  * // modelConfig: { prediction: "flux2_flow" } })`).
  * const { outputs } = diffusion({ modelId, prompt: "turn into watercolor", init_image: initImage });
  *
+ * // FLUX.2 multi-reference fusion
+ * // IMPORTANT: requires the model loaded with `modelConfig: { prediction: "flux2_flow" }`
+ * // and a Qwen3 text encoder via `llmModelSrc` (same loadModel requirements as the
+ * // FLUX.2 img2img example above). `init_image` and `init_images` are mutually
+ * // exclusive — pass one or the other, not both.
+ * const refA = fs.readFileSync("scientist-a.jpg");
+ * const refB = fs.readFileSync("scientist-b.jpg");
+ * const { outputs } = diffusion({
+ *   modelId,
+ *   prompt: "a portrait using most visual traits from @image1 and the eyes from @image2",
+ *   init_images: [refA, refB],
+ *   width: 768,
+ *   height: 768,
+ * });
+ *
+ * // LoRA adapter for this generation (absolute path required).
+ * // Persistence across subsequent diffusion() calls is controlled at
+ * // loadModel time via `modelConfig.lora_apply_mode`.
+ * const { outputs } = diffusion({
+ *   modelId,
+ *   prompt: "a watercolor cat",
+ *   lora: "/home/user/loras/watercolor.safetensors",
+ * });
+ *
  * // With progress tracking
  * const { progressStream, outputs } = diffusion({ modelId, prompt: "a cat" });
  * for await (const { step, totalSteps } of progressStream) {
@@ -53,10 +77,14 @@ interface DiffusionResult {
  * ```
  */
 export function diffusion(params: DiffusionClientParams): DiffusionResult {
-  const { init_image, ...rest } = params;
+  const { init_image, init_images, ...rest } = params;
+
   const request: DiffusionStreamRequest = {
     ...rest,
     ...(init_image !== undefined && { init_image: encodeBase64(init_image) }),
+    ...(init_images !== undefined && {
+      init_images: init_images.map(encodeBase64),
+    }),
     type: "diffusionStream",
   };
 

@@ -173,7 +173,7 @@
   },
   "dependencies": {
     "@qvac/decoder-audio": "^0.3.7",
-    "@qvac/diffusion-cpp": "^0.3.0",
+    "@qvac/diffusion-cpp": "^0.5.0",
     "@qvac/embed-llamacpp": "^0.14.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text": "^0.1.2",

@@ -4,6 +4,8 @@ import { modelSrcInputSchema } from "./model-src-utils";
 const BASE64_PATTERN =
   /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;
 
+const ABSOLUTE_PATH_PATTERN = /^(\/|[A-Za-z]:[\\/]|\\\\)/;
+
 export const sdcppConfigSchema = z
   .object({
     threads: z.number().optional(),
@@ -26,6 +28,16 @@ export const sdcppConfigSchema = z
     vae_on_cpu: z.boolean().optional().describe("Force VAE decoder to run on CPU"),
     vae_tiling: z.boolean().optional().describe("Enable VAE tiling for large images on limited VRAM"),
     flash_attn: z.boolean().optional().describe("Enable flash attention to reduce memory usage"),
+    lora_apply_mode: z.enum(["auto", "immediately", "at_runtime"]).optional()
+      .describe(
+        "How LoRA adapters passed via diffusion({ lora }) are applied. " +
+        "'auto' (default): picked based on weight type — 'at_runtime' for " +
+        "quantized weights, 'immediately' for full-precision. " +
+        "'immediately': adapter is fused into the model on first use and " +
+        "persists across subsequent diffusion() calls until the model is " +
+        "unloaded. " +
+        "'at_runtime': adapter is applied per-call and not persisted.",
+      ),
     verbosity: z.number().optional(),
     clipLModelSrc: modelSrcInputSchema.optional()
       .describe("CLIP-L text encoder model — required for SD3"),
@@ -210,7 +222,45 @@ export const diffusionRequestSchema = z.object({
     .min(1)
     .regex(BASE64_PATTERN)
     .optional()
-    .describe("Base64-encoded image for img2img generation"),
+    .describe("Base64-encoded image for img2img generation. Mutually exclusive with init_images."),
+  init_images: z.array(
+    z.string().min(1).regex(BASE64_PATTERN),
+  )
+    .min(1)
+    .optional()
+    .describe(
+      "FLUX.2-only multi-reference fusion: array of base64-encoded PNG/JPEG buffers. " +
+      "Each buffer becomes a separate reference image that the FLUX.2 transformer attends to. " +
+      "Mutually exclusive with init_image; requires the model to be loaded with " +
+      "config.prediction='flux2_flow' and a Qwen3 text encoder via llmModelSrc.",
+    ),
+  increase_ref_index: z.boolean().optional()
+    .describe(
+      "FLUX.2 fusion only. When omitted, the addon default (false) is used. When false, all " +
+      "reference latents share one RoPE index slot and blend via attention (recommended for " +
+      "FLUX.2-klein). When true, each reference gets its own RoPE index slot — use only with " +
+      "text encoders that receive per-image vision tokens.",
+    ),
+  auto_resize_ref_image: z.boolean().optional()
+    .describe(
+      "FLUX.2 only. When omitted, the addon default (true) is used. When true, every reference " +
+      "image (single or fusion) is auto-resized to the target width/height before VAE-encoding. " +
+      "Disable only if the buffers are already at the exact target dimensions.",
+    ),
+  lora: z
+    .string()
+    .min(1)
+    .regex(ABSOLUTE_PATH_PATTERN, {
+      message:
+        "lora must be an absolute path",
+    })
+    .optional()
+    .describe(
+      "Optional local LoRA adapter path to apply for this generation. " +
+      "Must be an absolute filesystem path. " +
+      "Whether the adapter persists across subsequent diffusion() calls is controlled " +
+      "by sdcppConfigSchema.lora_apply_mode (set at loadModel time).",
+    ),
   strength: z
     .number()
     .min(0)
@@ -219,7 +269,13 @@ export const diffusionRequestSchema = z.object({
     .describe(
       "img2img denoising strength (0.0 = keep source, 1.0 = ignore source); used by the SD/SDXL SDEdit path. No-op for FLUX.2, which uses in-context conditioning and ignores this field.",
     ),
-});
+}).refine(
+  (d) => d.init_image === undefined || d.init_images === undefined,
+  {
+    message:
+      "init_image and init_images are mutually exclusive — pass one or the other, not both.",
+  },
+);
 
 export type DiffusionRequest = z.input<typeof diffusionRequestSchema>;
 
@@ -231,6 +287,13 @@ export type DiffusionStreamRequest = z.input<
   typeof diffusionStreamRequestSchema
 >;
 
-export type DiffusionClientParams = Omit<DiffusionRequest, "init_image"> & {
-  init_image?: Uint8Array;
-};
+type DiffusionClientParamsBase = Omit<
+  DiffusionRequest,
+  "init_image" | "init_images"
+>;
+
+export type DiffusionClientParams = DiffusionClientParamsBase &
+  (
+    | { init_image?: Uint8Array; init_images?: never }
+    | { init_image?: never; init_images?: Uint8Array[] }
+  );
@@ -18,6 +18,10 @@ export async function* diffusion(
     ? Buffer.from(request.init_image, "base64")
     : undefined;
 
+  const init_images = request.init_images
+    ? request.init_images.map((b64) => Buffer.from(b64, "base64"))
+    : undefined;
+
   const response = await model.run({
     prompt: request.prompt,
     negative_prompt: request.negative_prompt,
@@ -34,7 +38,11 @@ export async function* diffusion(
     vae_tiling: request.vae_tiling,
     cache_preset: request.cache_preset,
     init_image,
+    init_images,
+    increase_ref_index: request.increase_ref_index,
+    auto_resize_ref_image: request.auto_resize_ref_image,
     strength: request.strength,
+    lora: request.lora,
   });
 
   let outputIndex = 0;

@@ -4,6 +4,7 @@ import { z } from "zod";
 import {
   sdcppConfigSchema,
   diffusionRequestSchema,
+  diffusionStreamRequestSchema,
   diffusionStreamResponseSchema,
   diffusionStatsSchema,
   modelInfoSchema,
@@ -320,6 +321,180 @@ test("diffusionRequestSchema: accepts strength at boundaries (0 and 1)", (t) =>
   t.is(resultOne.success, true);
 });
 
+// ---- LoRA + multi-reference fusion (FLUX.2) ----
+
+test("diffusionRequestSchema: accepts lora as POSIX absolute path", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "/home/user/loras/watercolor.safetensors",
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: accepts lora as Windows drive-letter path", (t) => {
+  const resultBackslash = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "C:\\models\\loras\\watercolor.safetensors",
+  });
+  t.is(resultBackslash.success, true);
+
+  const resultForwardSlash = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "C:/models/loras/watercolor.safetensors",
+  });
+  t.is(resultForwardSlash.success, true);
+});
+
+test("diffusionRequestSchema: accepts lora as Windows UNC path", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "\\\\server\\share\\loras\\watercolor.safetensors",
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: rejects lora as bare filename", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "my-lora.safetensors",
+  });
+  t.is(result.success, false);
+  if (!result.success) {
+    t.ok(
+      /must be an absolute path/.test(result.error.issues[0]!.message),
+      "error message explains lora must be absolute",
+    );
+  }
+});
+
+test("diffusionRequestSchema: rejects lora as relative path", (t) => {
+  const resultDot = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "./loras/watercolor.safetensors",
+  });
+  t.is(resultDot.success, false);
+
+  const resultParent = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "../loras/watercolor.safetensors",
+  });
+  t.is(resultParent.success, false);
+
+  const resultSubdir = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    lora: "loras/watercolor.safetensors",
+  });
+  t.is(resultSubdir.success, false);
+});
+
+test("diffusionRequestSchema: accepts init_images with multiple base64 buffers", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "blend @image1 and @image2",
+    init_images: ["iVBORw0KGgoAAAANSUhEUg==", "/9j/4AAQSkZJRgABAQEASABIAAA="],
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: accepts increase_ref_index boolean", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
+    increase_ref_index: true,
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: accepts auto_resize_ref_image boolean", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "a cat",
+    init_image: "iVBORw0KGgoAAAANSUhEUg==",
+    auto_resize_ref_image: false,
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: rejects when init_image and init_images are both set", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "blend two refs",
+    init_image: "iVBORw0KGgoAAAANSUhEUg==",
+    init_images: ["iVBORw0KGgoAAAANSUhEUg==", "/9j/4AAQSkZJRgABAQEASABIAAA="],
+  });
+  t.is(result.success, false);
+  if (!result.success) {
+    const messages = result.error.issues.map((i) => i.message).join(" | ");
+    t.ok(
+      messages.includes("mutually exclusive"),
+      `expected mutual-exclusion message, got: ${messages}`,
+    );
+  }
+});
+
+test("diffusionRequestSchema: accepts init_image alone (mutual exclusion not triggered)", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "img2img",
+    init_image: "iVBORw0KGgoAAAANSUhEUg==",
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionRequestSchema: accepts init_images alone (mutual exclusion not triggered)", (t) => {
+  const result = diffusionRequestSchema.safeParse({
+    modelId: "model-1",
+    prompt: "fusion",
+    init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
+  });
+  t.is(result.success, true);
+});
+
+// ---- diffusionStreamRequestSchema ----
+
+test("diffusionStreamRequestSchema: accepts a valid stream request with type literal", (t) => {
+  const result = diffusionStreamRequestSchema.safeParse({
+    type: "diffusionStream",
+    modelId: "model-1",
+    prompt: "a cat",
+  });
+  t.is(result.success, true);
+});
+
+test("diffusionStreamRequestSchema: rejects when init_image and init_images are both set", (t) => {
+  const result = diffusionStreamRequestSchema.safeParse({
+    type: "diffusionStream",
+    modelId: "model-1",
+    prompt: "a cat",
+    init_image: "iVBORw0KGgoAAAANSUhEUg==",
+    init_images: ["iVBORw0KGgoAAAANSUhEUg=="],
+  });
+  t.is(result.success, false);
+  if (!result.success) {
+    const messages = result.error.issues.map((i) => i.message).join(" | ");
+    t.ok(
+      messages.includes("mutually exclusive"),
+      `expected mutual-exclusion message, got: ${messages}`,
+    );
+  }
+});
+
+// ---- sdcppConfigSchema: lora_apply_mode ----
+
+test("sdcppConfigSchema: accepts lora_apply_mode", (t) => {
+  const result = sdcppConfigSchema.safeParse({ lora_apply_mode: "auto" });
+  t.is(result.success, true);
+});
+
 // ============================================
 // diffusionStreamResponseSchema
 // ============================================