tetherto · gianni-cor · Apr 15, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
@@ -199,6 +199,18 @@ jobs:
           cp "$MODEL_PATH" ./test/model/stable-diffusion-v2-1-Q8_0.gguf
           test -f ./test/model/stable-diffusion-v2-1-Q8_0.gguf
 
+      - if: ${{ matrix.os == 'windows-11' }}
+        name: Download SD2 model for C++ tests (Windows)
+        working-directory: ${{ env.WORKDIR }}
+        shell: powershell
+        run: |
+          & .\scripts\download-model-sd2.ps1
+          $modelPath = Get-ChildItem -Path . -Filter "stable-diffusion-v2-1-Q8_0.gguf" -Recurse | Select-Object -First 1 -ExpandProperty FullName
+          if (-not $modelPath) { throw "Model file not found" }
+          New-Item -ItemType Directory -Force -Path ".\test\model" | Out-Null
+          Copy-Item -Path $modelPath -Destination ".\test\model\stable-diffusion-v2-1-Q8_0.gguf" -Force
+          if (-not (Test-Path ".\test\model\stable-diffusion-v2-1-Q8_0.gguf")) { throw "Failed to copy model file" }
+
       - if: ${{ matrix.platform == 'darwin' }}
         name: Use Apple clang for Apple platform builds
         run: |
@@ -228,8 +240,8 @@ jobs:
         env:
           SD_TEST_MODEL_PATH: ${{ github.workspace }}/${{ env.WORKDIR }}/test/model/stable-diffusion-v2-1-Q8_0.gguf
         run: |
-          if [ -f "${{ github.workspace }}/.lsan-suppressions.txt" ]; then
-            export LSAN_OPTIONS="suppressions=${{ github.workspace }}/.lsan-suppressions.txt"
+          if [ -f "${{ github.workspace }}/${{ env.WORKDIR }}/.lsan-suppressions.txt" ]; then
+            export LSAN_OPTIONS="suppressions=${{ github.workspace }}/${{ env.WORKDIR }}/.lsan-suppressions.txt"
           fi
           echo "SD_TEST_MODEL_PATH=$SD_TEST_MODEL_PATH"
           ls -lh "$SD_TEST_MODEL_PATH"

@@ -72,7 +72,7 @@ jobs:
             platform: win32
             arch: x64
             runner: ai-run-windows11-gpu
-            timeout: 600
+            timeout: 1800
 
     steps:
       - name: Setup Node.js

@@ -23,4 +23,5 @@ logs/
 output/
 temp/
 *.deb
+*.zip
 test/integration/all.js
@@ -1,3 +1,8 @@
 # Known false positive with N-API callbacks under ASan
 leak:SdModel::process
 leak:SdModel::load
+
+# D-Bus library false positives during test initialization
+leak:dbus_bus_register
+leak:dbus_pending_call_block
+leak:_dbus_message_loader_queue_messages
@@ -1,5 +1,23 @@
 # Changelog
 
+## [0.2.0] - 2026-04-15
+
+### Added
+
+- FLUX.2 img2img support with in-context conditioning (`ref_images`) via `init_image` parameter
+- JS-side input validation for `readImageDimensions()` with buffer-length guards for truncated PNG/JPEG
+- Regression tests for FLUX img2img prediction guard and truncated image handling
+
+### Changed
+
+- FLUX img2img now requires explicit `prediction: 'flux2_flow'` in config to prevent silent fallback to SDEdit
+- Updated `prediction` docstring to clarify auto-detection is insufficient for FLUX img2img
+- Exported `readImageDimensions()` for testing and external use
+
+### Fixed
+
+- `readImageDimensions()` now safely handles truncated/corrupt PNG and JPEG buffers
+
 ## [0.1.3] - 2026-04-15
 
 ### Changed

@@ -95,6 +95,7 @@ add_bare_module(qvac-lib-inference-addon-sd EXPORTS ${BACKEND_DL_EXPORTS})
     ${PROJECT_SOURCE_DIR}/addon/src/handlers/SdCtxHandlers.cpp
     ${PROJECT_SOURCE_DIR}/addon/src/handlers/SdGenHandlers.cpp
     ${PROJECT_SOURCE_DIR}/addon/src/model-interface/SdModel.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/utils/ImageUtils.cpp
     ${PROJECT_SOURCE_DIR}/addon/src/utils/LoggingMacros.cpp
     ${PROJECT_SOURCE_DIR}/addon/src/utils/BackendSelection.cpp
   )

@@ -300,6 +300,18 @@ JavaScript Dependencies
     https://github.com/mafintosh/z32
 
 
+=========================================================================
+Image Assets
+=========================================================================
+
+--- public-domain (U.S. Federal Government Work) ---
+
+  assets/von-neumann.jpg
+    John von Neumann (1956). U.S. Department of Energy, File ID: HD.3F.191.
+    This image is in the Public Domain as a work of the U.S. Federal
+    Government (17 U.S.C. § 105). No copyright restrictions apply.
+    https://commons.wikimedia.org/wiki/File:JohnvonNeumann-LosAlamos.gif
+
 =========================================================================
 C++ Dependencies
 =========================================================================

@@ -19,6 +19,7 @@ Native C++ addon for text-to-image generation using [stable-diffusion.cpp](https
   - [7. Release Resources](#7-release-resources)
 - [Model File Reference](#model-file-reference)
 - [FLUX.2 Implementation Notes](#flux2-implementation-notes)
+- [Credits](#credits)
 - [License](#license)
 
 ---
@@ -146,13 +147,16 @@ Source: [`examples/generate-image.js`](./examples/generate-image.js)
 
 > **Performance note:** On an M1 MacBook Air (16 GB) with Metal enabled, loading takes ~15 s and 20 steps at 512 × 512 take ~10 minutes. Reduce `STEPS` to 4 for quick tests — FLUX.2's distilled model is designed for low step counts.
 
-## Other Exampless
+## Other Examples
 
 -   [Quickstart](./examples/quickstart.js) – Minimal text-to-image generation with SD2.1.
 -   [Generate Image (SD2.1)](./examples/generate-image-sd2.js) – Text-to-image with an SD2.1 all-in-one GGUF model.
 -   [Generate Image (SD3)](./examples/generate-image-sd3.js) – Text-to-image with SD3 Medium (safetensors, diffusion + CLIP encoders).
 -   [Generate Image (SDXL)](./examples/generate-image-sdxl.js) – Text-to-image with an SDXL base all-in-one GGUF model.
 -   [Runtime Stats](./examples/runtime-stats-sd2.js) – Run SD2.1 inference and report runtime statistics.
+-   [img2img FLUX2](./examples/img2img-flux2.js) – Transform an image with FLUX2-klein (Q8_0, in-context conditioning).
+-   [img2img FLUX2 F16](./examples/img2img-flux2-f16.js) – Transform an image with FLUX2-klein (F16 full precision).
+-   [img2img SD3](./examples/img2img-sd3.js) – Transform an image with SD3 Medium (SDEdit, flow-matching).
 
 ---
 
@@ -198,7 +202,7 @@ const config = {
 }
 ```
 
-All config values are coerced to strings internally before being passed to the native layer.
+Config values are coerced to strings internally. Generation parameters (prompt, steps, seed, etc.) are JSON-serialized with their native types preserved.
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
@@ -279,21 +283,61 @@ require('bare-fs').writeFileSync('output.png', images[0])
 
 > **Sampler note:** Do not set `sampling_method: 'euler_a'` for FLUX.2 models — it will produce random noise. Leave the field unset to let the library auto-select `euler` for flow-matching models.
 
-#### Image-to-image (not yet supported)
+#### Image-to-image (`init_image`)
 
-> **Note:** img2img is not yet wired in the JS layer — calling `model.run()` with `init_image` will throw. The parameters below are reserved for a future release.
+Pass `init_image` (a `Uint8Array` of PNG or JPEG bytes) to transform an existing image with a text prompt. Width and height are auto-detected from the image header and rounded to the nearest multiple of 8.
+
+The addon automatically selects the correct img2img strategy based on the model's prediction type:
+
+| Model family | Prediction type | Strategy | How it works |
+|-------------|----------------|----------|-------------|
+| FLUX.2 | `flux2_flow` / `flux_flow` | In-context conditioning (`ref_images`) | Input image is VAE-encoded into separate latent tokens; the transformer attends to them via joint attention with distinct RoPE positions. The target starts from pure noise, so the model preserves features while generating a fully new image. |
+| SD1.x / SD2.x / SDXL / SD3 | All others | SDEdit (`init_image`) | Input image is noised according to `strength` (0.0–1.0), then denoised with the text prompt. Lower strength preserves more of the original; higher strength allows more creative freedom. |
+
+**FLUX.2 example (in-context conditioning):**
 
 ```js
-const inputPng = require('bare-fs').readFileSync('input.png')
+const fs = require('bare-fs')
+
+const inputImage = fs.readFileSync('assets/von-neumann.jpg')
 
 const response = await model.run({
-  prompt: 'a photo of a cat in a snowy landscape',
-  init_image: inputPng,
-  strength: 0.75,  // 0.0 = no change, 1.0 = full redraw
-  steps: 20
+  prompt: 'a modern tech CEO version of this person, professional headshot',
+  init_image: inputImage,
+  cfg_scale: 1.0,
+  steps: 20,
+  guidance: 9.0,
+  seed: 42
 })
 ```
 
+**SD3 example (SDEdit):**
+
+```js
+const inputImage = fs.readFileSync('headshot.jpeg')
+
+const response = await model.run({
+  prompt: 'anime portrait, same pose, studio ghibli style, soft cel shading',
+  negative_prompt: 'photorealistic, blurry, low quality',
+  init_image: inputImage,
+  cfg_scale: 4.5,
+  steps: 30,
+  strength: 0.75,
+  sampling_method: 'euler',
+  seed: 42
+})
+```
+
+> **SDEdit img2img limitations:**
+>
+> - **Black-and-white input images** produce weaker results because the model must hallucinate all color information. Consider colorizing the image before feeding it in.
+> - **Low-resolution images** (below ~512×512) give the model less detail to preserve identity. Upscaling beforehand helps.
+> - **High `strength` values** (≥ 0.7) allow the model to deviate significantly from the input, including changing facial features, gender, or ethnicity. Use `strength` 0.35–0.55 for identity-preserving edits.
+> - **Style prompts** like "anime" or "studio ghibli" carry training-data biases that can alter the subject's appearance. Anchor the prompt with terms like "same person, same face" and use the negative prompt to block unwanted changes.
+> - **Non-multiple-of-8 images** are automatically aligned (nearest-neighbor resize to the next multiple of 8) before processing. For best quality, provide images with dimensions that are already multiples of 8.
+
+The bundled test image (`assets/von-neumann.jpg`) is a 1956 portrait of John von Neumann sourced from the U.S. Department of Energy (Public Domain). See the [Credits](#credits) section for details.
+
 ### 7. Release Resources
 
 ```js
@@ -441,6 +485,16 @@ The underlying pattern across all these fixes is the same: our C++ config struct
 
 ---
 
+## Credits
+
+### Test Image
+
+`assets/von-neumann.jpg` — **John von Neumann** (1956).
+Source: U.S. Department of Energy, File ID: HD.3F.191.
+This image is in the **Public Domain** as a work of the U.S. Federal Government.
+
+---
+
 ## License
 
 Apache-2.0 — see [LICENSE](./LICENSE) for details.
@@ -2,6 +2,55 @@
 
 const path = require('bare-path')
 
+/**
+ * Extract pixel dimensions from a PNG or JPEG buffer without a full decode.
+ *
+ * PNG: width/height are stored as big-endian uint32 at bytes 16–23 of the IHDR chunk.
+ * JPEG: scan for the first SOFx segment (0xFFCx) which stores height at +5 and width at +7.
+ *
+ * Returns { width, height } or null if the format is not recognised.
+ *
+ * @param {Uint8Array} buf
+ * @returns {{ width: number, height: number } | null}
+ */
+function readImageDimensions (buf) {
+  if (!buf || buf.length < 4) return null
+
+  // PNG — magic: \x89PNG\r\n\x1a\n  (IHDR width/height at bytes 16–23)
+  if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) {
+    if (buf.length < 24) return null
+    const w = (buf[16] << 24 | buf[17] << 16 | buf[18] << 8 | buf[19]) >>> 0
+    const h = (buf[20] << 24 | buf[21] << 16 | buf[22] << 8 | buf[23]) >>> 0
+    return { width: w, height: h }
+  }
+
+  // JPEG — magic: 0xFF 0xD8
+  if (buf[0] === 0xFF && buf[1] === 0xD8) {
+    let i = 2
+    while (i + 4 < buf.length) {
+      if (buf[i] !== 0xFF) break
+      const marker = buf[i + 1]
+      const segLen = (buf[i + 2] << 8 | buf[i + 3])
+      if (segLen < 2) break
+      // SOF0–SOF3, SOF5–SOF7, SOF9–SOF11, SOF13–SOF15
+      if (
+        (marker >= 0xC0 && marker <= 0xC3) ||
+        (marker >= 0xC5 && marker <= 0xC7) ||
+        (marker >= 0xC9 && marker <= 0xCB) ||
+        (marker >= 0xCD && marker <= 0xCF)
+      ) {
+        if (i + 8 >= buf.length) return null
+        const h = (buf[i + 5] << 8 | buf[i + 6])
+        const w = (buf[i + 7] << 8 | buf[i + 8])
+        return { width: w, height: h }
+      }
+      i += 2 + segLen
+    }
+  }
+
+  return null
+}
+
 /**
  * JavaScript wrapper around the native stable-diffusion.cpp addon.
  * Manages the native handle lifecycle and bridges JS ↔ C++.
@@ -61,6 +110,32 @@ class SdInterface {
    * @returns {Promise<boolean>} true if job was accepted, false if busy
    */
   async runJob (params) {
+    // Pass init_image Uint8Array directly to C++ as a typed-array property
+    // (avoids JSON-encoding every byte as a number).
+    // Auto-detect width/height from the image header so the C++ tensor
+    // dimensions always match the decoded image — without this, generate_image()
+    // hits GGML_ASSERT(image.width == tensor->ne[0]).
+    if (params.init_image) {
+      const serializable = { ...params }
+      const imgBuf = serializable.init_image
+      delete serializable.init_image
+
+      if (!serializable.width || !serializable.height) {
+        const dims = readImageDimensions(imgBuf)
+        if (dims) {
+          serializable.width = Math.ceil(dims.width / 8) * 8
+          serializable.height = Math.ceil(dims.height / 8) * 8
+        }
+      }
+
+      const paramsJson = JSON.stringify(serializable)
+      return this._binding.runJob(this._handle, {
+        type: 'text',
+        input: paramsJson,
+        initImageBuffer: imgBuf
+      })
+    }
+
     const paramsJson = JSON.stringify(params)
     return this._binding.runJob(this._handle, { type: 'text', input: paramsJson })
   }
@@ -76,4 +151,4 @@ class SdInterface {
   }
 }
 
-module.exports = { SdInterface }
+module.exports = { SdInterface, readImageDimensions }
@@ -86,6 +86,14 @@ inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
   SdModel::GenerationJob job;
   job.paramsJson = paramsJson;
 
+  auto inputObj = args.getJsObject(1, "inputObj");
+  auto initBuf =
+      inputObj
+          .getOptionalPropertyAs<js::TypedArray<uint8_t>, std::vector<uint8_t>>(
+              env, "initImageBuffer");
+  if (initBuf.has_value())
+    job.initImageBytes = std::move(initBuf.value());
+
   // Progress updates are queued as JSON strings (JsStringOutputHandler).
   job.progressCallback = [&instance](const std::string& progressJson) {
     instance.addonCpp->outputQueue->queueResult(std::any(progressJson));
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,4 +23,5 @@ logs/ @@
     output/
     temp/
     *.deb
+    *.zip
     test/integration/all.js