tetherto · maxim-smotrov · May 5, 2026 · Apr 30, 2026 · May 1, 2026 · May 1, 2026
@@ -44,6 +44,38 @@ interface DiffusionResult {
  * // modelConfig: { prediction: "flux2_flow" } })`).
  * const { outputs } = diffusion({ modelId, prompt: "turn into watercolor", init_image: initImage });
  *
+ * // FLUX.2 multi-reference fusion
+ * // IMPORTANT: requires the model loaded with `modelConfig: { prediction: "flux2_flow" }`
+ * // and a Qwen3 text encoder via `llmModelSrc` (same loadModel requirements as the
+ * // FLUX.2 img2img example above). `init_image` and `init_images` are mutually
+ * // exclusive — pass one or the other, not both.
+ * const refA = fs.readFileSync("scientist-a.jpg");
+ * const refB = fs.readFileSync("scientist-b.jpg");
+ * const { outputs } = diffusion({
+ *   modelId,
+ *   prompt: "a portrait using most visual traits from @image1 and the eyes from @image2",
+ *   init_images: [refA, refB],
+ *   width: 768,
+ *   height: 768,
+ * });
+ *
+ * // LoRA adapter for this generation
+ * // Pass an absolute path to a local adapter you downloaded ahead of time.
+ * // Relative paths are rejected — the SDK runs across processes with
+ * // differing working directories, so an absolute path is required.
+ * // Whether the adapter persists across subsequent diffusion() calls is
+ * // controlled at loadModel time via `modelConfig.lora_apply_mode`.
+ * // Default is "auto", which delegates to stable-diffusion.cpp: it picks
+ * // "at_runtime" when the loaded model has quantized weights, and
+ * // "immediately" otherwise. See `sdcppConfigSchema.lora_apply_mode` for
+ * // the full description, and the @qvac/diffusion-cpp `LoraApplyMode`
+ * // type for the addon-level contract.
+ * const { outputs } = diffusion({
+ *   modelId,
+ *   prompt: "a watercolor cat",
+ *   lora: "/home/user/loras/watercolor.safetensors",
+ * });
+ *
  * // With progress tracking
  * const { progressStream, outputs } = diffusion({ modelId, prompt: "a cat" });
  * for await (const { step, totalSteps } of progressStream) {
@@ -53,10 +85,14 @@ interface DiffusionResult {
  * ```
  */
 export function diffusion(params: DiffusionClientParams): DiffusionResult {
-  const { init_image, ...rest } = params;
+  const { init_image, init_images, ...rest } = params;
+
   const request: DiffusionStreamRequest = {
     ...rest,
     ...(init_image !== undefined && { init_image: encodeBase64(init_image) }),
+    ...(init_images !== undefined && {
+      init_images: init_images.map(encodeBase64),
+    }),
     type: "diffusionStream",
   };
 

@@ -173,7 +173,7 @@
   },
   "dependencies": {
     "@qvac/decoder-audio": "^0.3.7",
-    "@qvac/diffusion-cpp": "^0.3.0",
+    "@qvac/diffusion-cpp": "^0.5.0",
     "@qvac/embed-llamacpp": "^0.14.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text": "^0.1.2",

@@ -4,6 +4,8 @@ import { modelSrcInputSchema } from "./model-src-utils";
 const BASE64_PATTERN =
   /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/;
 
+const ABSOLUTE_PATH_PATTERN = /^(\/|[A-Za-z]:[\\/]|\\\\)/;
+
 export const sdcppConfigSchema = z
   .object({
     threads: z.number().optional(),
@@ -26,6 +28,20 @@ export const sdcppConfigSchema = z
     vae_on_cpu: z.boolean().optional().describe("Force VAE decoder to run on CPU"),
     vae_tiling: z.boolean().optional().describe("Enable VAE tiling for large images on limited VRAM"),
     flash_attn: z.boolean().optional().describe("Enable flash attention to reduce memory usage"),
+    lora_apply_mode: z.enum(["auto", "immediately", "at_runtime"]).optional()
+      .describe(
+        "How LoRA adapters passed via diffusion({ lora }) are applied. " +
+        "'auto' (addon default): the underlying stable-diffusion.cpp library " +
+        "picks based on the loaded model's weight type — 'at_runtime' when " +
+        "any weights are quantized (q4_K, q5_K, q8_0, etc.), 'immediately' " +
+        "for full-precision weights (f16/f32/bf16). " +
+        "'immediately': adapter is fused into the model on first use and " +
+        "persists across subsequent diffusion() calls (even ones without " +
+        "lora) until the model is unloaded. " +
+        "'at_runtime': adapter is applied per-call and not persisted. " +
+        "See diffusion-cpp/index.d.ts (LoraApplyMode) and the upstream " +
+        "stable-diffusion.cpp `--lora-apply-mode` flag for the full contract.",
+      ),
     verbosity: z.number().optional(),
     clipLModelSrc: modelSrcInputSchema.optional()
       .describe("CLIP-L text encoder model — required for SD3"),
@@ -210,7 +226,45 @@ export const diffusionRequestSchema = z.object({
     .min(1)
     .regex(BASE64_PATTERN)
     .optional()
-    .describe("Base64-encoded image for img2img generation"),
+    .describe("Base64-encoded image for img2img generation. Mutually exclusive with init_images."),
+  init_images: z.array(
+    z.string().min(1).regex(BASE64_PATTERN),
+  )
+    .min(1)
+    .optional()
+    .describe(
+      "FLUX.2-only multi-reference fusion: array of base64-encoded PNG/JPEG buffers. " +
+      "Each buffer becomes a separate reference image that the FLUX.2 transformer attends to. " +
+      "Mutually exclusive with init_image; requires the model to be loaded with " +
+      "config.prediction='flux2_flow' and a Qwen3 text encoder via llmModelSrc.",
+    ),
+  increase_ref_index: z.boolean().optional()
+    .describe(
+      "FLUX.2 fusion only. When omitted, the addon default (false) is used. When false, all " +
+      "reference latents share one RoPE index slot and blend via attention (recommended for " +
+      "FLUX.2-klein). When true, each reference gets its own RoPE index slot — use only with " +
+      "text encoders that receive per-image vision tokens.",
+    ),
+  auto_resize_ref_image: z.boolean().optional()
+    .describe(
+      "FLUX.2 only. When omitted, the addon default (true) is used. When true, every reference " +
+      "image (single or fusion) is auto-resized to the target width/height before VAE-encoding. " +
+      "Disable only if the buffers are already at the exact target dimensions.",
+    ),
+  lora: z
+    .string()
+    .min(1)
+    .regex(ABSOLUTE_PATH_PATTERN, {
+      message:
+        "lora must be an absolute path",
+    })
+    .optional()
+    .describe(
+      "Optional local LoRA adapter path to apply for this generation. " +
+      "Must be an absolute filesystem path. " +
+      "Whether the adapter persists across subsequent diffusion() calls is controlled " +
+      "by sdcppConfigSchema.lora_apply_mode (set at loadModel time).",
+    ),
   strength: z
     .number()
     .min(0)
@@ -219,7 +273,13 @@ export const diffusionRequestSchema = z.object({
     .describe(
       "img2img denoising strength (0.0 = keep source, 1.0 = ignore source); used by the SD/SDXL SDEdit path. No-op for FLUX.2, which uses in-context conditioning and ignores this field.",
     ),
-});
+}).refine(
+  (d) => d.init_image === undefined || d.init_images === undefined,
+  {
+    message:
+      "init_image and init_images are mutually exclusive — pass one or the other, not both.",
+  },
+);
 
 export type DiffusionRequest = z.input<typeof diffusionRequestSchema>;
 
@@ -231,6 +291,13 @@ export type DiffusionStreamRequest = z.input<
   typeof diffusionStreamRequestSchema
 >;
 
-export type DiffusionClientParams = Omit<DiffusionRequest, "init_image"> & {
-  init_image?: Uint8Array;
-};
+type DiffusionClientParamsBase = Omit<
+  DiffusionRequest,
+  "init_image" | "init_images"
+>;
+
+export type DiffusionClientParams = DiffusionClientParamsBase &
+  (
+    | { init_image?: Uint8Array; init_images?: never }
+    | { init_image?: never; init_images?: Uint8Array[] }
+  );
@@ -18,6 +18,10 @@ export async function* diffusion(
     ? Buffer.from(request.init_image, "base64")
     : undefined;
 
+  const init_images = request.init_images
+    ? request.init_images.map((b64) => Buffer.from(b64, "base64"))
+    : undefined;
+
   const response = await model.run({
     prompt: request.prompt,
     negative_prompt: request.negative_prompt,
@@ -34,7 +38,11 @@ export async function* diffusion(
     vae_tiling: request.vae_tiling,
     cache_preset: request.cache_preset,
     init_image,
+    init_images,
+    increase_ref_index: request.increase_ref_index,
+    auto_resize_ref_image: request.auto_resize_ref_image,
     strength: request.strength,
+    lora: request.lora,
   });
 
   let outputIndex = 0;