diff --git a/.github/workflows/integration-test-lib-infer-diffusion.yml b/.github/workflows/integration-test-lib-infer-diffusion.yml
index 11fd300314..5dbb7a1e01 100644
--- a/.github/workflows/integration-test-lib-infer-diffusion.yml
+++ b/.github/workflows/integration-test-lib-infer-diffusion.yml
@@ -23,7 +23,7 @@ on:
 
 jobs:
   run-integration-tests:
-    timeout-minutes: 360
+    timeout-minutes: ${{ matrix.timeout || 360 }}
     continue-on-error: true
     runs-on: ${{ matrix.runner }}
     name: test-${{ matrix.platform }}-${{ matrix.arch }}
@@ -72,6 +72,7 @@ jobs:
             platform: win32
             arch: x64
             runner: ai-run-windows11-gpu
+            timeout: 600
 
     steps:
       - name: Setup Node.js
@@ -238,6 +239,33 @@ jobs:
         shell: bash
         run: sudo apt-get update && sudo apt-get install -y mesa-vulkan-drivers
 
+      - name: macOS — Metal GPU diagnostic
+        if: matrix.platform == 'darwin'
+        shell: bash
+        run: |
+          echo "=== GPU Hardware ==="
+          system_profiler SPDisplaysDataType 2>/dev/null || echo "(system_profiler unavailable)"
+          echo ""
+          echo "=== Metal Device Check ==="
+          cat > /tmp/metal_check.m << 'OBJC'
+          #import <Metal/Metal.h>
+          #import <stdio.h>
+          int main() {
+            id<MTLDevice> dev = MTLCreateSystemDefaultDevice();
+            if (!dev) { printf("No Metal device available\n"); return 1; }
+            printf("Device    : %s\n", [[dev name] UTF8String]);
+            printf("Headless  : %s\n", dev.headless ? "yes" : "no");
+            printf("LowPower  : %s\n", dev.lowPower ? "yes" : "no");
+            printf("Apple4    : %d\n", [dev supportsFamily:MTLGPUFamilyApple4]);
+            printf("Apple7    : %d\n", [dev supportsFamily:MTLGPUFamilyApple7]);
+            printf("Metal3    : %d\n", [dev supportsFamily:MTLGPUFamilyMetal3]);
+            printf("simdgroup : %d\n", [dev supportsFamily:MTLGPUFamilyApple7]);
+            return 0;
+          }
+          OBJC
+          clang -x objective-c -framework Metal -framework Foundation \
+                -o /tmp/metal_check /tmp/metal_check.m 2>/dev/null && /tmp/metal_check || echo "(Metal check failed to compile/run)"
+
       - name: Run integration test (Linux/macOS)
         if: ${{ matrix.platform != 'win32' }}
         working-directory: ${{ env.WORKDIR }}
diff --git a/.github/workflows/prebuilds-lib-infer-diffusion.yml b/.github/workflows/prebuilds-lib-infer-diffusion.yml
index a39b90eb2c..f917e9e6f6 100644
--- a/.github/workflows/prebuilds-lib-infer-diffusion.yml
+++ b/.github/workflows/prebuilds-lib-infer-diffusion.yml
@@ -403,7 +403,7 @@ jobs:
         working-directory: ${{ env.WORKDIR }}
         run: |
           if [[ "${{ matrix.platform }}" == "android" ]]; then
-            find prebuilds -name "*.bare" -exec $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip {} \;
+            find prebuilds \( -name "*.bare" -o -name "*.so" \) -exec $ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip {} \;
           elif [[ "${{ matrix.platform }}" == "darwin" || "${{ matrix.platform }}" == "ios" ]]; then
             find prebuilds -name "*.bare" -exec strip -S {} \;
           else
diff --git a/packages/lib-infer-diffusion/.gitignore b/packages/lib-infer-diffusion/.gitignore
new file mode 100644
index 0000000000..8b30aa95bd
--- /dev/null
+++ b/packages/lib-infer-diffusion/.gitignore
@@ -0,0 +1,26 @@
+.vs/
+build/
+dist/
+models/
+store/
+node_modules/
+prebuilds/
+
+.npmrc
+package-lock.json
+.cache/
+.idea/
+**/store/
+.DS_Store
+logs/
+*.gguf
+*.safetensors
+*.ckpt
+*.log
+.clang-tidy
+# Added by qvac-lint-cpp
+.clang-format
+output/
+temp/
+*.deb
+test/integration/all.js
diff --git a/packages/lib-infer-diffusion/.lsan-suppressions.txt b/packages/lib-infer-diffusion/.lsan-suppressions.txt
new file mode 100644
index 0000000000..f47017482d
--- /dev/null
+++ b/packages/lib-infer-diffusion/.lsan-suppressions.txt
@@ -0,0 +1,3 @@
+# Known false positive with N-API callbacks under ASan
+leak:SdModel::process
+leak:SdModel::load
diff --git a/packages/lib-infer-diffusion/CHANGELOG.md b/packages/lib-infer-diffusion/CHANGELOG.md
new file mode 100644
index 0000000000..711d842ea4
--- /dev/null
+++ b/packages/lib-infer-diffusion/CHANGELOG.md
@@ -0,0 +1,24 @@
+# Changelog
+
+## [0.1.0] - 2026-03-19
+
+### Added
+
+#### Stable Diffusion inference addon
+
+Initial release of the `@qvac/diffusion-cpp` native addon for image generation, supporting SD1.x, SD2.x, SDXL, SD3, and FLUX model families.
+
+#### GPU acceleration
+
+- Metal backend on macOS, iOS
+- Vulkan backends on Windows, Linux, Android
+- OpenCL backend on Android devices with Adreno GPU
+- CPU fallback on all platforms
+
+#### Android dynamic backend loading
+
+Dynamic ggml backend loading (`GGML_BACKEND_DL`) on Android with `libqvac-diffusion-ggml-*` naming to avoid symbol conflicts with system-installed ggml libraries. CPU backends remain statically linked (`GGML_CPU_STATIC`) while GPU backends are loaded at runtime.
+
+#### vcpkg-based build system
+
+vcpkg overlay ports for `ggml` and `stable-diffusion-cpp` with clang override triplets for Linux and PIC static linking. Custom patches for runtime backend selection, abort callbacks, failure-path cleanup, and Android Vulkan diagnostics.
diff --git a/packages/lib-infer-diffusion/CMakeLists.txt b/packages/lib-infer-diffusion/CMakeLists.txt
new file mode 100644
index 0000000000..727c98cc89
--- /dev/null
+++ b/packages/lib-infer-diffusion/CMakeLists.txt
@@ -0,0 +1,145 @@
+cmake_minimum_required(VERSION 3.25)
+
+option(ANDROID_STL "Android STL linkage" c++_shared)
+option(BUILD_TESTING "Build tests" OFF)
+if(BUILD_TESTING)
+  list(APPEND VCPKG_MANIFEST_FEATURES "tests")
+endif()
+
+option(SD_CUDA "Enable CUDA GPU backend" OFF)
+if(SD_CUDA)
+  list(APPEND VCPKG_MANIFEST_FEATURES "cuda")
+endif()
+
+option(SD_OPENCL "Enable OpenCL GPU backend (Android/Adreno)" OFF)
+if(SD_OPENCL)
+  list(APPEND VCPKG_MANIFEST_FEATURES "opencl")
+endif()
+
+# Vulkan and Metal are auto-enabled via default-features in the ggml and
+# stable-diffusion-cpp overlay ports.  CUDA requires a specific build machine
+# so it remains opt-in via SD_CUDA.
+#
+# GGML_BACKEND_DL is ON only on Android (set in the ggml portfile), mirroring
+# qvac-fabric. On Linux/macOS/Windows GPU backends are statically linked.
+
+find_package(cmake-bare REQUIRED PATHS node_modules/cmake-bare)
+find_package(cmake-vcpkg REQUIRED PATHS node_modules/cmake-vcpkg)
+
+set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/triplets;${VCPKG_OVERLAY_TRIPLETS}")
+
+project(qvac-lib-inference-addon-sd C CXX)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+  add_compile_options(-stdlib=libc++)
+  add_link_options(-stdlib=libc++ -static-libstdc++)
+endif()
+
+find_path(VCPKG_INSTALLED_PATH share/qvac-lint-cpp/.clang-format REQUIRED)
+configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-format
+               ${CMAKE_CURRENT_SOURCE_DIR}/.clang-format COPYONLY)
+configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-tidy
+               ${CMAKE_CURRENT_SOURCE_DIR}/.clang-tidy COPYONLY)
+
+find_path(PICOJSON_INCLUDE_DIRS "picojson/picojson.h")
+find_path(QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS "qvac-lib-inference-addon-cpp/JsInterface.hpp")
+
+# stable-diffusion.cpp – uses the CMake config installed by the overlay port
+find_package(stable-diffusion-cpp CONFIG REQUIRED)
+
+# ggml is installed alongside stable-diffusion.cpp as separate static libs.
+# stable-diffusion.a references ggml symbols externally, so we must link them.
+find_package(ggml CONFIG REQUIRED)
+
+find_path(STB_IMAGE_WRITE_INCLUDE_DIR "stb_image_write.h" REQUIRED)
+
+if(WIN32)
+  add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -DNOGDI)
+endif()
+
+bare_target(bare_target_value)
+bare_module_target("." unused_target NAME module_name VERSION unused_version)
+set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}")
+message("Building qvac-lib-inference-addon-sd with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'")
+
+# On Android with GGML_BACKEND_DL, install GPU backend .so modules alongside
+# the addon so ggml can dlopen them at runtime.  CPU backends are statically
+# linked (GGML_CPU_STATIC) and excluded from this loop.
+set(BACKEND_DL_EXPORTS "")
+if(ANDROID AND GGML_BACKEND_DL)
+  foreach(_backend ${GGML_AVAILABLE_BACKENDS})
+    if("${_backend}" MATCHES "^ggml-cpu")
+      continue()
+    endif()
+    # qvac ggml overlays prefix backend module filenames (e.g.
+    # libqvac-ggml-vulkan.so / libqvac-diffusion-ggml-vulkan.so). These
+    # modules are installed under vcpkg's bin/ directory on Android.
+    find_library(_${_backend}_LIB NAMES
+      "qvac-diffusion-${_backend}"
+      "${_backend}"
+      PATHS
+        "${VCPKG_INSTALLED_PATH}/bin"
+        "${VCPKG_INSTALLED_PATH}/lib"
+      REQUIRED)
+    add_library(ggml::${_backend} SHARED IMPORTED)
+    set_target_properties(ggml::${_backend} PROPERTIES
+      IMPORTED_LOCATION "${_${_backend}_LIB}"
+      IMPORTED_NO_SONAME TRUE)
+    list(APPEND BACKEND_DL_EXPORTS INSTALL TARGET ggml::${_backend})
+  endforeach()
+endif()
+
+add_bare_module(qvac-lib-inference-addon-sd EXPORTS ${BACKEND_DL_EXPORTS})
+  set(ADDON_SOURCES
+    ${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/handlers/SdCtxHandlers.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/handlers/SdGenHandlers.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/model-interface/SdModel.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/utils/LoggingMacros.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/utils/BackendSelection.cpp
+  )
+
+  target_sources(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      ${ADDON_SOURCES}
+  )
+
+  target_include_directories(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      ${PICOJSON_INCLUDE_DIRS}
+      ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS}
+      ${STB_IMAGE_WRITE_INCLUDE_DIR}
+      ${PROJECT_SOURCE_DIR}/addon/src
+  )
+
+  target_link_libraries(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      stable-diffusion::stable-diffusion
+  )
+
+target_compile_features(${qvac-lib-inference-addon-sd} PRIVATE cxx_std_20)
+target_compile_definitions(${qvac-lib-inference-addon-sd} PUBLIC JS_LOGGER)
+target_compile_definitions(${qvac-lib-inference-addon-sd} PRIVATE BACKENDS_SUBDIR="${BACKENDS_SUBDIR_VALUE}")
+
+if(GGML_BACKEND_DL)
+  target_compile_definitions(${qvac-lib-inference-addon-sd} PRIVATE GGML_BACKEND_DL)
+endif()
+
+if(BUILD_TESTING)
+  find_package(GTest CONFIG REQUIRED)
+  include(GoogleTest)
+  enable_testing()
+  add_subdirectory(test/unit)
+endif()
+
+if(WIN32)
+  target_link_libraries(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      msvcrt.lib
+  )
+endif()
+
diff --git a/packages/lib-infer-diffusion/LICENSE b/packages/lib-infer-diffusion/LICENSE
new file mode 100644
index 0000000000..7d199ae333
--- /dev/null
+++ b/packages/lib-infer-diffusion/LICENSE
@@ -0,0 +1,179 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+Copyright 2026 Tether Data, S.A. de C.V.
diff --git a/packages/lib-infer-diffusion/NOTICE b/packages/lib-infer-diffusion/NOTICE
new file mode 100644
index 0000000000..db65ecc94a
--- /dev/null
+++ b/packages/lib-infer-diffusion/NOTICE
@@ -0,0 +1,270 @@
+@qvac/diffusion-cpp
+Copyright 2026 Tether Data, S.A. de C.V.
+
+This product includes third-party components under their
+respective licenses. @qvac/diffusion-cpp itself is licensed under
+Apache-2.0; bundled dependencies are governed by the licenses
+listed below.
+
+=========================================================================
+JavaScript Dependencies
+=========================================================================
+
+--- apache-2.0 (Apache License 2.0) ---
+
+  @hyperswarm/secret-stream@6.9.1
+    https://github.com/holepunchto/hyperswarm-secret-stream
+  @qvac/dl-base@0.1.1
+  @qvac/dl-hyperdrive@0.1.1
+  @qvac/error@0.1.1
+  @qvac/infer-base@0.1.1
+  @qvac/infer-base@0.2.2
+  @qvac/logging@0.1.0
+  @qvac/response@0.1.2
+  adaptive-timeout@1.0.1
+    https://github.com/holepunchto/adaptive-timeout
+  b4a@1.8.0
+    https://github.com/holepunchto/b4a
+  bare-abort@2.0.13
+    https://github.com/holepunchto/bare-abort
+  bare-addon-resolve@1.10.0
+    https://github.com/holepunchto/bare-addon-resolve
+  bare-ansi-escapes@2.2.3
+    https://github.com/holepunchto/bare-ansi-escapes
+  bare-assert@1.2.0
+    https://github.com/holepunchto/bare-assert
+  bare-buffer@3.6.0
+    https://github.com/holepunchto/bare-buffer
+  bare-env@3.0.0
+    https://github.com/holepunchto/bare-env
+  bare-events@2.4.2
+    https://github.com/holepunchto/bare-events
+  bare-events@2.8.2
+    https://github.com/holepunchto/bare-events
+  bare-fs@4.5.6
+    https://github.com/holepunchto/bare-fs
+  bare-hrtime@2.1.1
+    https://github.com/holepunchto/bare-hrtime
+  bare-inspect@3.1.4
+    https://github.com/holepunchto/bare-inspect
+  bare-module-resolve@1.12.1
+    https://github.com/holepunchto/bare-module-resolve
+  bare-os@3.8.0
+    https://github.com/holepunchto/bare-os
+  bare-path@3.0.0
+    https://github.com/holepunchto/bare-path
+  bare-pipe@4.1.5
+    https://github.com/holepunchto/bare-pipe
+  bare-process@4.3.0
+    https://github.com/holepunchto/bare-process
+  bare-semver@1.0.2
+    https://github.com/holepunchto/bare-semver
+  bare-signals@4.2.0
+    https://github.com/holepunchto/bare-signals
+  bare-stream@2.10.0
+    https://github.com/holepunchto/bare-stream
+  bare-tty@5.0.3
+    https://github.com/holepunchto/bare-tty
+  bare-type@1.1.0
+    https://github.com/holepunchto/bare-type
+  bare-url@2.4.0
+    https://github.com/holepunchto/bare-url
+  blind-relay@1.4.0
+    https://github.com/holepunchto/blind-relay
+  compact-encoding@2.19.0
+    https://github.com/compact-encoding/compact-encoding
+  device-file@2.3.1
+    https://github.com/holepunchto/device-file
+  events-universal@1.0.1
+    https://github.com/holepunchto/events-universal
+  fd-lock@2.1.1
+    https://github.com/holepunchto/fd-lock
+  fs-native-extensions@1.4.5
+    https://github.com/holepunchto/fs-native-extensions
+  hyperblobs@2.9.0
+    https://github.com/holepunchto/hyperblobs
+  hypercore-errors@1.5.0
+    https://github.com/holepunchto/hypercore-errors
+  hypercore-id-encoding@1.3.0
+    https://github.com/holepunchto/hypercore-id-encoding
+  hypercore-storage@2.7.1
+    https://github.com/holepunchto/hypercore-storage
+  hyperdrive@13.3.0
+    https://github.com/holepunchto/hyperdrive
+  hyperschema@1.20.1
+    https://github.com/holepunchto/hyperschema
+  index-encoder@3.5.0
+    https://github.com/holepunchto/index-encoder
+  mirror-drive@1.13.0
+    https://github.com/holepunchto/mirror-drive
+  noise-handshake@4.2.0
+    https://github.com/holepunchto/noise-handshake
+  quickbit-native@2.4.8
+    https://github.com/holepunchto/quickbit-native
+  rabin-native@2.0.0
+    https://github.com/holepunchto/rabin-native
+  rabin-stream@2.0.0
+    https://github.com/holepunchto/rabin-stream
+  rache@1.0.0
+    https://github.com/holepunchto/rache
+  refcounter@1.0.0
+    https://github.com/holepunchto/refcounter
+  require-addon@1.2.0
+    https://github.com/holepunchto/require-addon
+  resource-on-exit@1.0.0
+    https://github.com/holepunchto/bare-teardown
+  rocksdb-native@3.14.0
+    https://github.com/holepunchto/rocksdb-native
+  scope-lock@1.2.4
+    https://github.com/holepunchto/scope-lock
+  simdle-native@1.3.9
+    https://github.com/holepunchto/simdle-native
+  sub-encoder@2.1.3
+    https://github.com/holepunchto/sub-encoder
+  text-decoder@1.2.7
+    https://github.com/holepunchto/text-decoder
+  udx-native@1.19.2
+    https://github.com/holepunchto/udx-native
+  unslab@1.3.0
+    https://github.com/holepunchto/unslab
+  which-runtime@1.3.2
+    https://github.com/holepunchto/which-runtime
+
+--- isc (ISC License) ---
+
+  bits-to-bytes@1.3.0
+    https://github.com/holepunchto/bits-to-bytes
+  compact-encoding-bitfield@1.0.0
+    https://github.com/compact-encoding/compact-encoding-bitfield
+  compact-encoding-net@1.2.0
+    https://github.com/compact-encoding/compact-encoding-net
+  nanoassert@2.0.0
+    https://github.com/emilbayes/nanoassert
+  noise-curve-ed@2.1.0
+    https://github.com/chm-diederichs/noise-curve-ed
+  quickbit-universal@2.2.0
+    https://github.com/holepunchto/quickbit-universal
+  simdle-universal@1.1.2
+    https://github.com/holepunchto/simdle-universal
+
+--- mit (MIT License) ---
+
+  big-sparse-array@1.0.3
+    https://github.com/mafintosh/big-sparse-array
+  binary-stream-equals@1.0.0
+    https://github.com/mafintosh/binary-stream-equals
+  bogon@1.2.0
+    https://github.com/mafintosh/bogon
+  codecs@3.1.0
+    https://github.com/mafintosh/codecs
+  corestore@7.9.1
+    https://github.com/holepunchto/corestore
+  debounceify@1.1.0
+    https://github.com/mafintosh/debounceify
+  dht-rpc@6.26.3
+    https://github.com/mafintosh/dht-rpc
+  fast-fifo@1.3.2
+    https://github.com/mafintosh/fast-fifo
+  flat-tree@1.13.0
+    https://github.com/mafintosh/flat-tree
+  generate-object-property@2.0.0
+    https://github.com/mafintosh/generate-object-property
+  generate-string@1.0.1
+    https://github.com/mafintosh/generate-string
+  hyperbee@2.27.3
+    https://github.com/holepunchto/hyperbee
+  hypercore@11.27.14
+    https://github.com/holepunchto/hypercore
+  hypercore-crypto@3.6.1
+    https://github.com/mafintosh/hypercore-crypto
+  hyperdht@6.29.3
+    https://github.com/holepunchto/hyperdht
+  hyperswarm@4.17.0
+    https://github.com/holepunchto/hyperswarm
+  is-options@1.0.2
+    https://github.com/mafintosh/is-options
+  is-property@1.0.2
+    https://github.com/mikolalysenko/is-property
+  kademlia-routing-table@1.0.6
+    https://github.com/mafintosh/kademlia-routing-table
+  mutexify@1.4.0
+    https://github.com/mafintosh/mutexify
+  nat-sampler@1.0.1
+    https://github.com/mafintosh/nat-sampler
+  protocol-buffers-encodings@1.2.0
+    https://github.com/mafintosh/protocol-buffers-encodings
+  protomux@3.10.1
+    https://github.com/mafintosh/protomux
+  queue-tick@1.0.1
+    https://github.com/mafintosh/queue-tick
+  random-array-iterator@1.0.0
+    https://github.com/mafintosh/random-array-iterator
+  ready-resource@1.2.0
+    https://github.com/holepunchto/ready-resource
+  record-cache@1.2.0
+    https://github.com/mafintosh/record-cache
+  resolve-reject-promise@1.1.0
+    https://github.com/mafintosh/resolve-reject-promise
+  safety-catch@1.0.3
+    https://github.com/mafintosh/safety-catch
+  same-data@1.0.0
+    https://github.com/mafintosh/same-data
+  shuffled-priority-queue@2.1.0
+    https://github.com/mafintosh/shuffled-priority-queue
+  signal-promise@1.0.3
+    https://github.com/mafintosh/signal-promise
+  signed-varint@2.0.1
+    https://github.com/dominictarr/signed-varint
+  sodium-native@5.1.0
+    https://github.com/holepunchto/sodium-native
+  sodium-secretstream@1.2.0
+    https://github.com/mafintosh/sodium-secretstream
+  sodium-universal@5.0.1
+    https://github.com/holepunchto/sodium-universal
+  speedometer@1.1.0
+    https://github.com/mafintosh/speedometer
+  streamx@2.25.0
+    https://github.com/mafintosh/streamx
+  teex@1.0.1
+    https://github.com/mafintosh/teex
+  test-tmp@1.4.0
+    https://github.com/mafintosh/test-tmp
+  time-ordered-set@2.0.1
+    https://github.com/mafintosh/time-ordered-set
+  timeout-refresh@2.0.1
+    https://github.com/mafintosh/timeout-refresh
+  unix-path-resolve@1.0.2
+    https://github.com/mafintosh/unix-path-resolve
+  unordered-set@2.0.1
+    https://github.com/mafintosh/unordered-set
+  varint@5.0.0
+    https://github.com/chrisdickinson/varint
+  xache@1.2.1
+    https://github.com/mafintosh/xache
+  z32@1.1.0
+    https://github.com/mafintosh/z32
+
+
+=========================================================================
+C++ Dependencies
+=========================================================================
+
+--- apache-2.0-with-llvm-exception ---
+
+  libc++ (LLVM C++ Standard Library)
+    https://github.com/llvm/llvm-project
+
+--- bsd-2-clause (BSD 2-Clause License) ---
+
+  picojson
+    https://github.com/kazuho/picojson
+
+--- mit (MIT License) ---
+
+  ggml
+    https://github.com/ggml-org/ggml
+  stable-diffusion-cpp
+    https://github.com/tetherto/qvac-ext-stable-diffusion.cpp
+  stb
+    https://github.com/nothings/stb
+
diff --git a/packages/lib-infer-diffusion/README.md b/packages/lib-infer-diffusion/README.md
new file mode 100644
index 0000000000..e77e3c21d0
--- /dev/null
+++ b/packages/lib-infer-diffusion/README.md
@@ -0,0 +1,448 @@
+# qvac-lib-infer-stable-diffusion-cpp
+
+Native C++ addon for text-to-image and image-to-image generation using [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp), built for the Bare Runtime. Supports **Stable Diffusion 1.x / 2.x / XL / 3** and **FLUX.2 [klein]**.
+
+> **Scope:** Video generation (Wan2.x) is not yet supported.
+
+## Table of Contents
+
+- [Supported platforms](#supported-platforms)
+- [Building from Source](#building-from-source)
+- [Downloading Model Files](#downloading-model-files)
+- [Running the Example](#running-the-example)
+- [Other Examples](#other-examples)
+- [Usage](#usage)
+  - [1. Import the Model Class](#1-import-the-model-class)
+  - [2. Create the `args` object](#2-create-the-args-object)
+  - [3. Create the `config` object](#3-create-the-config-object)
+  - [4. Create a Model Instance](#4-create-a-model-instance)
+  - [5. Load the Model](#5-load-the-model)
+  - [6. Run Inference](#6-run-inference)
+  - [7. Release Resources](#7-release-resources)
+- [Model File Reference](#model-file-reference)
+- [FLUX.2 Implementation Notes](#flux2-implementation-notes)
+- [License](#license)
+
+---
+
+## Supported platforms
+
+| Platform | Architecture | Status | GPU Backend |
+|----------|-------------|--------|-------------|
+| macOS | arm64 | ✅ Tier 1 | Metal |
+| macOS | x64 | ✅ Tier 1 | Metal |
+| Linux | arm64, x64 | ✅ Tier 1 | Vulkan |
+| Android | arm64 | ✅ Tier 1 | Vulkan, OpenCL |
+| iOS | arm64 | ✅ Tier 1 | Metal |
+| Windows | x64 | ✅ Tier 1 | Vulkan |
+
+**Dependencies:**
+- `stable-diffusion.cpp` (bundled via vcpkg overlay port)
+- `ggml` (bundled alongside stable-diffusion.cpp)
+- Bare Runtime ≥ 1.24.0
+- CMake ≥ 3.25 and a C++20-capable compiler
+
+---
+
+## Building from Source
+
+See [build.md](./build.md) for prerequisites, platform-specific setup, cross-compilation, and troubleshooting.
+
+Quick start:
+
+```bash
+npm install -g bare bare-make
+npm install
+npm run build
+```
+
+---
+
+## Downloading Model Files
+
+A download script is provided that fetches all required files for **FLUX.2 [klein] 4B**:
+
+```bash
+./scripts/download-model.sh
+```
+
+This downloads three files into the `models/` directory:
+
+| File | Size | Description |
+|------|------|-------------|
+| `flux-2-klein-4b-Q8_0.gguf` | ~4.0 GB | FLUX.2 [klein] 4B diffusion model (Q8_0 quantised) |
+| `Qwen3-4B-Q6_K.gguf` | ~3.1 GB | Qwen3 4B text encoder (Q6_K quantised) |
+| `flux2-vae.safetensors` | ~321 MB | VAE decoder |
+
+> **Note:** Downloads can be resumed if interrupted — the script uses `curl -C -` for resumable transfers.
+
+### Why these specific files?
+
+FLUX.2 [klein] uses a split model layout. Three separate components are required:
+
+- **Diffusion model** (`flux-2-klein-4b-Q8_0.gguf`) — the main image transformer. This GGUF has no SD metadata KV pairs so it must be loaded via `diffusion_model_path` internally, not `model_path`.
+- **Text encoder** (`Qwen3-4B-Q6_K.gguf`) — Qwen3 4B in standard GGML Q6_K format. The FP4 safetensors variant from ComfyUI (`qwen_3_4b_fp4_flux2.safetensors`) is **not supported** by ggml and will fail with a tensor shape error.
+- **VAE** (`flux2-vae.safetensors`) — standard safetensors format, compatible as-is.
+
+### Disk and RAM requirements
+
+| Component | Disk | RAM at runtime |
+|-----------|------|----------------|
+| Diffusion model (Q8_0) | 4.0 GB | ~4.1 GB |
+| Text encoder (Q6_K) | 3.1 GB | ~4.3 GB |
+| VAE | 321 MB | ~95 MB |
+| **Total** | **~7.4 GB** | **~8.5 GB** |
+
+A machine with **16 GB of unified memory** (e.g. MacBook Air M-series) can run this model.
+
+---
+
+## Running the Example
+
+Two runnable examples are provided.
+
+### Load / unload only
+
+Verifies the model loads and releases cleanly without running inference:
+
+```bash
+npm run example
+```
+
+Expected output:
+
+```
+FLUX.2 [klein] 4B — load/unload example
+========================================
+Model loaded in 12.0s
+Model is ready. (No inference in this example.)
+Done — all resources released.
+```
+
+Source: [`examples/load-model.js`](./examples/load-model.js)
+
+### Text-to-image generation
+
+Generates a 512 × 512 PNG with a 20-step FLUX.2 run, saves it to `output/`:
+
+```bash
+npm run generate
+```
+
+Expected output:
+
+```
+FLUX.2 [klein] 4B — text-to-image inference
+============================================
+Loaded in 15.2s
+
+Starting generation...
+  [████████████████████] 20/20 steps
+
+Generated in 610.0s
+Got 1 image(s)
+Saved → .../output/output_seed42_0.png
+```
+
+Source: [`examples/generate-image.js`](./examples/generate-image.js)
+
+> **Performance note:** On an M1 MacBook Air (16 GB) with Metal enabled, loading takes ~15 s and 20 steps at 512 × 512 take ~10 minutes. Reduce `STEPS` to 4 for quick tests — FLUX.2's distilled model is designed for low step counts.
+
+## Other Exampless
+
+-   [Quickstart](./examples/quickstart.js) – Minimal text-to-image generation with SD2.1.
+-   [Generate Image (SD2.1)](./examples/generate-image-sd2.js) – Text-to-image with an SD2.1 all-in-one GGUF model.
+-   [Generate Image (SD3)](./examples/generate-image-sd3.js) – Text-to-image with SD3 Medium (safetensors, diffusion + CLIP encoders).
+-   [Generate Image (SDXL)](./examples/generate-image-sdxl.js) – Text-to-image with an SDXL base all-in-one GGUF model.
+-   [Runtime Stats](./examples/runtime-stats-sd2.js) – Run SD2.1 inference and report runtime statistics.
+
+---
+
+## Usage
+
+### 1. Import the Model Class
+
+```js
+const ImgStableDiffusion = require('@qvac/img-stable-diffusion-cpp')
+```
+
+### 2. Create the `args` object
+
+```js
+const path = require('bare-path')
+
+const MODELS_DIR = path.resolve(__dirname, './models')
+const args = {
+  logger: console,
+  diskPath: MODELS_DIR,
+  modelName:  'flux-2-klein-4b-Q8_0.gguf',
+  llmModel:   'Qwen3-4B-Q6_K.gguf',   // Qwen3 text encoder for FLUX.2 [klein]
+  vaeModel:   'flux2-vae.safetensors'
+}
+```
+
+| Property | Required | Description |
+|----------|----------|-------------|
+| `diskPath` | ✅ | Local directory where model files are already stored |
+| `modelName` | ✅ | Diffusion model file name (all-in-one for SD1.x/2.x; diffusion-only GGUF for FLUX.2) |
+| `logger` | — | Logger instance (e.g. `console`) |
+| `clipLModel` | — | Separate CLIP-L text encoder (FLUX.1 / SD3) |
+| `clipGModel` | — | Separate CLIP-G text encoder (SDXL / SD3) |
+| `t5XxlModel` | — | Separate T5-XXL text encoder (FLUX.1 / SD3) |
+| `llmModel` | — | Qwen3 LLM text encoder (FLUX.2 [klein]) |
+| `vaeModel` | — | Separate VAE file |
+
+### 3. Create the `config` object
+
+```js
+const config = {
+  threads: 8  // CPU threads for tensor operations (Metal handles GPU automatically)
+}
+```
+
+All config values are coerced to strings internally before being passed to the native layer.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `threads` | number | auto | Number of CPU threads for model loading and CPU ops |
+| `wtype` | `'f32'` \| `'f16'` \| `'q4_0'` \| `'q8_0'` \| … | auto | Override weight quantisation type |
+| `rng` | `'cpu'` \| `'cuda'` \| `'std_default'` | `'cuda'` | RNG backend (`'cuda'` = philox RNG — not GPU-specific despite the name; recommended) |
+| `clip_on_cpu` | `true` \| `false` | `false` | Force CLIP encoder to run on CPU |
+| `vae_on_cpu` | `true` \| `false` | `false` | Force VAE to run on CPU |
+| `flash_attn` | `true` \| `false` | `false` | Enable flash attention (reduces memory) |
+
+### 4. Create a Model Instance
+
+```js
+const model = new ImgStableDiffusion(args, config)
+```
+
+The constructor stores configuration only — no memory is allocated yet.
+
+### 5. Load the Model
+
+```js
+await model.load()
+```
+
+This creates the native `sd_ctx_t` and loads all weights into memory. It can take 10–30 seconds depending on disk speed and model size. All model files must already be present on disk at `diskPath`.
+
+### 6. Run Inference
+
+#### Text-to-image (`model.run`)
+
+The primary API. Returns a `QvacResponse` that streams step-progress ticks and the final PNG:
+
+```js
+const images = []
+
+const response = await model.run({
+  prompt: 'a majestic red fox in a snowy forest, golden light, photorealistic',
+  steps: 20,
+  width: 512,
+  height: 512,
+  guidance: 3.5,   // distilled guidance scale — FLUX.2 specific
+  seed: 42
+})
+
+await response
+  .onUpdate(data => {
+    if (data instanceof Uint8Array) {
+      images.push(data)  // PNG-encoded output image
+    } else if (typeof data === 'string') {
+      try {
+        const tick = JSON.parse(data)
+        if ('step' in tick) process.stdout.write(`\rStep ${tick.step}/${tick.total}`)
+      } catch (_) {}
+    }
+  })
+  .await()
+
+require('bare-fs').writeFileSync('output.png', images[0])
+```
+
+**Generation parameters:**
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `prompt` | string | — | Text prompt |
+| `negative_prompt` | string | `''` | Things to avoid in the output |
+| `width` | number | `512` | Output width in pixels (multiple of 8) |
+| `height` | number | `512` | Output height in pixels (multiple of 8) |
+| `steps` | number | `20` | Number of diffusion steps |
+| `guidance` | number | `3.5` | Distilled guidance scale (FLUX.2) |
+| `cfg_scale` | number | `7.0` | Classifier-free guidance scale (SD1.x / SD2.x) |
+| `sampling_method` | string | auto | Sampler name; auto-selects `euler` for FLUX.2, `euler_a` for SD1.x |
+| `scheduler` | string | auto | Scheduler; auto-selected per model family |
+| `seed` | number | `-1` | Random seed (-1 for random) |
+| `batch_count` | number | `1` | Number of images to generate |
+| `vae_tiling` | boolean | `false` | Enable VAE tiling (required for large images on 16 GB) |
+| `cache_preset` | string | — | Step-caching preset: `slow`, `medium`, `fast`, `ultra` |
+
+> **Sampler note:** Do not set `sampling_method: 'euler_a'` for FLUX.2 models — it will produce random noise. Leave the field unset to let the library auto-select `euler` for flow-matching models.
+
+#### Image-to-image (via `model.run` with `init_image`)
+
+```js
+const inputPng = require('bare-fs').readFileSync('input.png')
+
+const response = await model.run({
+  prompt: 'a photo of a cat in a snowy landscape',
+  init_image: inputPng,
+  strength: 0.75,  // 0.0 = no change, 1.0 = full redraw
+  steps: 20
+})
+```
+
+### 7. Release Resources
+
+```js
+await model.unload()
+```
+
+`unload()` calls `free_sd_ctx` which releases all GPU and CPU memory. The JS object can be safely garbage collected afterwards.
+
+---
+
+## Model File Reference
+
+### FLUX.2 [klein] 4B (recommended for 16 GB machines)
+
+| Role | File | Source |
+|------|------|--------|
+| Diffusion model | `flux-2-klein-4b-Q8_0.gguf` | `leejet/FLUX.2-klein-4B-GGUF` |
+| Text encoder | `Qwen3-4B-Q6_K.gguf` | `unsloth/Qwen3-4B-GGUF` |
+| VAE | `flux2-vae.safetensors` | `Comfy-Org/vae-text-encorder-for-flux-klein-4b` |
+
+> The `qwen_3_4b_fp4_flux2.safetensors` file from the ComfyUI repo **will not work** — FP4 quantisation is NVIDIA-specific and is not supported by ggml.
+
+### Stable Diffusion 1.x / 2.x
+
+Pass an all-in-one checkpoint directly as `modelName`. No separate encoders needed.
+
+---
+
+## FLUX.2 Implementation Notes
+
+This section documents non-obvious issues encountered integrating FLUX.2 [klein] into the addon and how each was resolved. These serve as a reference if the underlying `stable-diffusion.cpp` version is upgraded.
+
+### 1. Metal GPU backend not activated (macOS)
+
+**Symptom:** Generation ran entirely on CPU at 700%+ CPU usage; 20 steps at 512 × 512 never completed.
+
+**Root cause:** The vcpkg overlay port passed `-DGGML_METAL=ON` to CMake, which compiled the ggml Metal library (`libggml-metal.a`). However, `stable-diffusion.cpp` internally guards `ggml_backend_metal_init()` behind its own `SD_USE_METAL` preprocessor define, which is only set when `-DSD_METAL=ON` is passed — a separate flag from `GGML_METAL`.
+
+**Fix:** Changed the portfile (`vcpkg/ports/stable-diffusion-cpp/portfile.cmake`) from:
+
+```cmake
+-DGGML_METAL=${SD_GGML_METAL}
+```
+
+to:
+
+```cmake
+-DSD_METAL=${SD_GGML_METAL}
+```
+
+`-DSD_METAL=ON` causes `stable-diffusion.cpp`'s own `CMakeLists.txt` to set `GGML_METAL=ON` *and* emit `-DSD_USE_METAL`, which activates `ggml_backend_metal_init()` at runtime.
+
+**Verification:** After the fix, CPU usage dropped from ~700% to ~0.5% during generation, confirming the GPU is handling the compute.
+
+---
+
+### 2. Noise output instead of image — wrong prediction type default
+
+**Symptom:** Generation completed all 20 steps and produced a PNG, but the image was pure coloured noise (TV static).
+
+**Root cause:** `SdCtxConfig::prediction` defaulted to `EPS_PRED` (the classic SD1.x epsilon-prediction denoiser). When `SdModel::load()` passed this to `sd_ctx_params_t.prediction`, it overrode `stable-diffusion.cpp`'s auto-detection, forcing the wrong denoiser on a FLUX.2 flow-matching model. The correct sentinel value for auto-detection is `PREDICTION_COUNT`.
+
+**Fix:** Changed the default in `addon/src/handlers/SdCtxHandlers.hpp`:
+
+```cpp
+// Before
+prediction_t prediction = EPS_PRED;
+
+// After
+prediction_t prediction = PREDICTION_COUNT;  // auto-detect from GGUF metadata
+```
+
+---
+
+### 3. Noise output — wrong flow_shift default
+
+**Symptom:** Same noise output as above (compounded with fix 2).
+
+**Root cause:** `SdCtxConfig::flowShift` defaulted to `0.0f`. For FLUX.2, `stable-diffusion.cpp` expects `INFINITY` as the sentinel meaning "use the model's embedded flow-shift value". A value of `0.0f` disabled flow-shifting entirely, breaking the entire noise schedule.
+
+**Fix:**
+
+```cpp
+// Before
+float flowShift = 0.0f;
+
+// After
+float flowShift = std::numeric_limits<float>::infinity();  // use model's embedded value
+```
+
+---
+
+### 4. Wrong sampler default bypassing auto-detection
+
+**Symptom:** Even with fixes 1–3, the wrong sampler could be selected if passed explicitly.
+
+**Root cause:** `SdGenConfig::sampleMethod` defaulted to `EULER_A_SAMPLE_METHOD`. The `generate_image()` function in `stable-diffusion.cpp` only runs its auto-detection (`sd_get_default_sample_method()`) when `sample_method == SAMPLE_METHOD_COUNT`. Since we always passed `EULER_A` explicitly, FLUX.2 (a DiT flow-matching model that needs `EULER`) got the ancestral euler sampler instead, producing garbage.
+
+**Fix:** Changed the default in `addon/src/handlers/SdGenHandlers.hpp`:
+
+```cpp
+// Before
+sample_method_t sampleMethod = EULER_A_SAMPLE_METHOD;
+scheduler_t     scheduler    = DISCRETE_SCHEDULER;
+
+// After
+sample_method_t sampleMethod = SAMPLE_METHOD_COUNT;  // auto (euler for FLUX, euler_a for SD1.x)
+scheduler_t     scheduler    = SCHEDULER_COUNT;      // auto
+```
+
+With these sentinel values, `stable-diffusion.cpp` selects `euler` for DiT/FLUX models and `euler_a` for SD1.x/SD2.x automatically.
+
+---
+
+### 5. Wrong RNG default
+
+**Symptom:** Minor correctness difference vs reference CLI output.
+
+**Root cause:** `SdCtxConfig` defaulted to `rngType = CPU_RNG` (Mersenne Twister). `sd_ctx_params_init()` in `stable-diffusion.cpp` sets `CUDA_RNG` (the philox RNG — named `CUDA_RNG` for historical reasons but not GPU-specific). The philox RNG is the expected default across all platforms.
+
+**Fix:**
+
+```cpp
+// Before
+rng_type_t rngType        = CPU_RNG;
+rng_type_t samplerRngType = CPU_RNG;
+
+// After
+rng_type_t rngType        = CUDA_RNG;       // philox RNG — matches sd_ctx_params_init default
+rng_type_t samplerRngType = RNG_TYPE_COUNT; // auto
+```
+
+---
+
+### Summary of default alignment
+
+The underlying pattern across all these fixes is the same: our C++ config structs had concrete default values that *overrode* `stable-diffusion.cpp`'s own sentinel-based auto-detection. The correct approach is to use the same sentinel values that `sd_ctx_params_init()` and `sd_sample_params_init()` set, and only pass concrete values when the caller explicitly requests them.
+
+| Field | Wrong default | Correct default | Effect of wrong value |
+|-------|--------------|-----------------|----------------------|
+| `prediction` | `EPS_PRED` | `PREDICTION_COUNT` | Forces SD1.x epsilon denoiser on FLUX.2 → noise |
+| `flow_shift` | `0.0f` | `INFINITY` | Disables flow-shifting → broken noise schedule |
+| `sample_method` | `EULER_A_SAMPLE_METHOD` | `SAMPLE_METHOD_COUNT` | Wrong sampler for flow-matching models → noise |
+| `scheduler` | `DISCRETE_SCHEDULER` | `SCHEDULER_COUNT` | Wrong schedule for FLUX.2 |
+| `rng_type` | `CPU_RNG` | `CUDA_RNG` | Different noise seed generation vs reference |
+| `ggml_metal` cmake flag | `-DGGML_METAL=ON` | `-DSD_METAL=ON` | Metal library compiled but never initialised |
+
+---
+
+## License
+
+Apache-2.0 — see [LICENSE](./LICENSE) for details.
diff --git a/packages/lib-infer-diffusion/addon.js b/packages/lib-infer-diffusion/addon.js
new file mode 100644
index 0000000000..4e8198ed1c
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon.js
@@ -0,0 +1,79 @@
+'use strict'
+
+const path = require('bare-path')
+
+/**
+ * JavaScript wrapper around the native stable-diffusion.cpp addon.
+ * Manages the native handle lifecycle and bridges JS ↔ C++.
+ */
+class SdInterface {
+  /**
+   * @param {object} binding - The native addon binding (from require.addon())
+   * @param {object} configurationParams - Configuration for the SD context
+   * @param {string} configurationParams.path - Local file path to the model weights
+   * @param {object} [configurationParams.config] - SD-specific configuration options
+   * @param {Function} outputCb - Called on any generation event (started, progress, output, error)
+   */
+  constructor (binding, configurationParams, outputCb) {
+    this._binding = binding
+
+    if (!configurationParams.config) {
+      configurationParams.config = {}
+    }
+
+    if (!configurationParams.config.backendsDir) {
+      configurationParams.config.backendsDir = path.join(__dirname, 'prebuilds')
+    }
+
+    // C++ getSubmap expects every config value to be a JS string.
+    // Coerce numbers and booleans here so the native layer never sees non-string values.
+    configurationParams.config = Object.fromEntries(
+      Object.entries(configurationParams.config)
+        .filter(([, v]) => v !== undefined)
+        .map(([k, v]) => [k, String(v)])
+    )
+
+    this._handle = this._binding.createInstance(
+      this,
+      configurationParams,
+      outputCb
+    )
+  }
+
+  /**
+   * Moves addon to the LISTENING state after initialization.
+   */
+  async activate () {
+    this._binding.activate(this._handle)
+  }
+
+  /**
+   * Cancel the current generation job.
+   */
+  async cancel () {
+    if (!this._handle) return
+    await this._binding.cancel(this._handle)
+  }
+
+  /**
+   * Run a generation job with the given parameters.
+   * @param {object} params - Generation parameters (will be JSON-serialized)
+   * @returns {Promise<boolean>} true if job was accepted, false if busy
+   */
+  async runJob (params) {
+    const paramsJson = JSON.stringify(params)
+    return this._binding.runJob(this._handle, { type: 'text', input: paramsJson })
+  }
+
+  /**
+   * Destroy the native instance and release all resources.
+   * After this the SdInterface object must not be used.
+   */
+  async unload () {
+    if (!this._handle) return
+    this._binding.destroyInstance(this._handle)
+    this._handle = null
+  }
+}
+
+module.exports = { SdInterface }
diff --git a/packages/lib-infer-diffusion/addon/src/addon/AddonJs.hpp b/packages/lib-infer-diffusion/addon/src/addon/AddonJs.hpp
new file mode 100644
index 0000000000..d4d1f62659
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/addon/AddonJs.hpp
@@ -0,0 +1,129 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <qvac-lib-inference-addon-cpp/JsInterface.hpp>
+#include <qvac-lib-inference-addon-cpp/JsUtils.hpp>
+#include <qvac-lib-inference-addon-cpp/ModelInterfaces.hpp>
+#include <qvac-lib-inference-addon-cpp/addon/AddonJs.hpp>
+#include <qvac-lib-inference-addon-cpp/handlers/JsOutputHandlerImplementations.hpp>
+#include <qvac-lib-inference-addon-cpp/handlers/OutputHandler.hpp>
+#include <qvac-lib-inference-addon-cpp/queue/OutputCallbackJs.hpp>
+
+#include "handlers/SdCtxHandlers.hpp"
+#include "model-interface/SdModel.hpp"
+
+namespace qvac_lib_inference_addon_sd {
+
+inline js_value_t* createInstance(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+  using namespace std;
+
+  JsArgsParser args(env, info);
+
+  // ── Step 1: Extract model file paths from JS args[1] ────────────────────
+  // index.js selects which field to populate based on model family:
+  //   "path"               → model_path          (SD1.x / SDXL all-in-one
+  //   checkpoint) "diffusionModelPath" → diffusion_model_path (FLUX.2 [klein]
+  //   standalone GGUF)
+  // Exactly one of the two will be non-empty; SdModel::load() passes both to
+  // sd_ctx_params_t and the library uses whichever is set.
+  SdCtxConfig config{};
+
+  config.modelPath = args.getMapEntry(1, "path");
+  config.diffusionModelPath = args.getMapEntry(1, "diffusionModelPath");
+  config.clipLPath = args.getMapEntry(1, "clipLPath");
+  config.clipGPath = args.getMapEntry(1, "clipGPath");
+  config.t5XxlPath = args.getMapEntry(1, "t5XxlPath");
+  config.llmPath = args.getMapEntry(1, "llmPath");
+  config.vaePath = args.getMapEntry(1, "vaePath");
+
+  // ── Step 2: Apply SD_CTX_HANDLERS to the "config" sub-object ─────────────
+  // configMap holds the flat key/value pairs from the second constructor arg
+  // (e.g. { threads: "8", flash_attn: "true", ... }).
+  // All values arrive as JS strings (coerced in addon.js).
+  auto configMap = args.getSubmap(1, "config");
+  applySdCtxHandlers(config, configMap);
+
+  // ── Step 3: Construct the model with the fully resolved config ────────────
+  auto model = make_unique<SdModel>(std::move(config));
+
+  // ── Step 4: Register output handlers ─────────────────────────────────────
+  // Progress updates are JSON strings; image frames are uint8 byte arrays.
+  out_handl::OutputHandlers<out_handl::JsOutputHandlerInterface> outHandlers;
+  outHandlers.add(make_shared<out_handl::JsStringOutputHandler>());
+  outHandlers.add(make_shared<out_handl::JsTypedArrayOutputHandler<uint8_t>>());
+
+  unique_ptr<OutputCallBackInterface> callback = make_unique<OutputCallBackJs>(
+      env,
+      args.get(0, "jsHandle"),
+      args.getFunction(2, "outputCallback"),
+      std::move(outHandlers));
+
+  auto addon = make_unique<AddonJs>(env, std::move(callback), std::move(model));
+
+  return JsInterface::createInstance(env, std::move(addon));
+}
+JSCATCH
+
+inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+  using namespace std;
+
+  JsArgsParser args(env, info);
+  AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance"));
+
+  auto [type, jsInput] = JsInterface::getInput(args);
+
+  if (type != "text")
+    throw StatusError(
+        general_error::InvalidArgument,
+        "stable-diffusion runJob expects a single text input with JSON params");
+
+  const string paramsJson = js::String(env, jsInput).as<std::string>(env);
+
+  SdModel::GenerationJob job;
+  job.paramsJson = paramsJson;
+
+  // Progress updates are queued as JSON strings (JsStringOutputHandler).
+  job.progressCallback = [&instance](const std::string& progressJson) {
+    instance.addonCpp->outputQueue->queueResult(std::any(progressJson));
+  };
+
+  // Image frames are queued as uint8 byte vectors (JsTypedArrayOutputHandler).
+  job.outputCallback = [&instance](const std::vector<uint8_t>& imageBytes) {
+    instance.addonCpp->outputQueue->queueResult(std::any(imageBytes));
+  };
+
+  return instance.runJob(std::any(std::move(job)));
+}
+JSCATCH
+
+/**
+ * Activate the addon — loads model weights by calling SdModel::load() directly.
+ * SdModel does not implement IModelAsyncLoad, so we bypass AddonCpp::activate()
+ * (which routes through that interface) and call load() here instead.
+ * Args: [0] instance handle
+ */
+inline js_value_t* activate(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+
+  JsArgsParser args(env, info);
+  AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance"));
+
+  auto* sdModel = dynamic_cast<SdModel*>(&instance.addonCpp->model.get());
+  if (sdModel == nullptr) {
+    throw StatusError(
+        general_error::InternalError, "activate: model is not an SdModel");
+  }
+
+  sdModel->load();
+
+  js_value_t* result = nullptr;
+  js_get_undefined(env, &result);
+  return result;
+}
+JSCATCH
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.cpp b/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.cpp
new file mode 100644
index 0000000000..838bb9731c
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.cpp
@@ -0,0 +1,275 @@
+#include "SdCtxHandlers.hpp"
+
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+
+#include "utils/LoggingMacros.hpp"
+
+namespace qvac_lib_inference_addon_sd {
+
+using namespace qvac_errors;
+
+// ── Parse helpers
+// ─────────────────────────────────────────────────────────────
+
+static bool parseBool(const std::string& v, const std::string& key) {
+  if (v == "true" || v == "1")
+    return true;
+  if (v == "false" || v == "0")
+    return false;
+  throw StatusError(
+      general_error::InvalidArgument,
+      key + " must be 'true'/'1' or 'false'/'0', got: '" + v + "'");
+}
+
+static int parseInt(const std::string& v, const std::string& key) {
+  try {
+    return std::stoi(v);
+  } catch (...) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        key + " must be an integer, got: '" + v + "'");
+  }
+}
+
+static float parseFloat(const std::string& v, const std::string& key) {
+  try {
+    return std::stof(v);
+  } catch (...) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        key + " must be a float, got: '" + v + "'");
+  }
+}
+
+// ── Handler map
+// ───────────────────────────────────────────────────────────────
+
+const SdCtxHandlersMap SD_CTX_HANDLERS = {
+
+    // ── Compute
+    // ────────────────────────────────────────────────────────────────
+
+    {"threads",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.nThreads = parseInt(v, "threads");
+     }},
+
+    // "fa" is the CLI short-form; "flash_attn" is the long-form — both
+    // accepted.
+    {"fa",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.flashAttn = parseBool(v, "fa");
+     }},
+    {"flash_attn",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.flashAttn = parseBool(v, "flash_attn");
+     }},
+    {"diffusion_fa",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.diffusionFlashAttn = parseBool(v, "diffusion_fa");
+     }},
+
+    // ── Memory management
+    // ──────────────────────────────────────────────────────
+
+    {"mmap",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.mmap = parseBool(v, "mmap");
+     }},
+    {"offload_to_cpu",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.offloadToCpu = parseBool(v, "offload_to_cpu");
+     }},
+    {"device", [](SdCtxConfig& c, const std::string& v) { c.device = v; }},
+    {"clip_on_cpu",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.keepClipOnCpu = parseBool(v, "clip_on_cpu");
+     }},
+    {"vae_on_cpu",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.keepVaeOnCpu = parseBool(v, "vae_on_cpu");
+     }},
+
+    // ── Weight precision
+    // ───────────────────────────────────────────────────────
+
+    {"type",
+     [](SdCtxConfig& c, const std::string& v) {
+       if (v.empty() || v == "auto")
+         c.wtype = SD_TYPE_COUNT;
+       else if (v == "f32")
+         c.wtype = SD_TYPE_F32;
+       else if (v == "f16")
+         c.wtype = SD_TYPE_F16;
+       else if (v == "bf16")
+         c.wtype = SD_TYPE_BF16;
+       else if (v == "q4_0")
+         c.wtype = SD_TYPE_Q4_0;
+       else if (v == "q4_1")
+         c.wtype = SD_TYPE_Q4_1;
+       else if (v == "q4_k")
+         c.wtype = SD_TYPE_Q4_K;
+       else if (v == "q5_0")
+         c.wtype = SD_TYPE_Q5_0;
+       else if (v == "q5_1")
+         c.wtype = SD_TYPE_Q5_1;
+       else if (v == "q5_k")
+         c.wtype = SD_TYPE_Q5_K;
+       else if (v == "q6_k")
+         c.wtype = SD_TYPE_Q6_K;
+       else if (v == "q8_0")
+         c.wtype = SD_TYPE_Q8_0;
+       else if (v == "q2_k")
+         c.wtype = SD_TYPE_Q2_K;
+       else if (v == "q3_k")
+         c.wtype = SD_TYPE_Q3_K;
+       else
+         throw StatusError(
+             general_error::InvalidArgument,
+             "type: unknown weight type '" + v + "'");
+     }},
+
+    {"tensor_type_rules",
+     [](SdCtxConfig& c, const std::string& v) { c.tensorTypeRules = v; }},
+
+    // ── Sampling RNG
+    // ───────────────────────────────────────────────────────────
+
+    {"rng",
+     [](SdCtxConfig& c, const std::string& v) {
+       if (v == "cpu")
+         c.rngType = CPU_RNG;
+       else if (v == "cuda")
+         c.rngType = CUDA_RNG;
+       else if (v == "std_default")
+         c.rngType = STD_DEFAULT_RNG;
+       else
+         throw StatusError(
+             general_error::InvalidArgument,
+             "rng must be 'cpu', 'cuda', or 'std_default', got: '" + v + "'");
+     }},
+
+    {"sampler_rng",
+     [](SdCtxConfig& c, const std::string& v) {
+       if (v == "cpu")
+         c.samplerRngType = CPU_RNG;
+       else if (v == "cuda")
+         c.samplerRngType = CUDA_RNG;
+       else if (v == "std_default")
+         c.samplerRngType = STD_DEFAULT_RNG;
+       else
+         throw StatusError(
+             general_error::InvalidArgument,
+             "sampler_rng must be 'cpu', 'cuda', or 'std_default', got: '" + v +
+                 "'");
+     }},
+
+    // ── Prediction type
+    // ────────────────────────────────────────────────────────
+    // SD1.x  → "eps"         (epsilon prediction)
+    // SD2.x  → "v"           (v-prediction)
+    // SD3    → "flow"        (flow matching)
+    // FLUX.2 → "flux2_flow"  (FLUX.2 flow matching)
+    // Leave unset (or "auto") to use PREDICTION_COUNT sentinel for
+    // auto-detection.
+
+    {"prediction",
+     [](SdCtxConfig& c, const std::string& v) {
+       if (v.empty() || v == "auto")
+         c.prediction = PREDICTION_COUNT; // sentinel: auto-detect
+       else if (v == "eps")
+         c.prediction = EPS_PRED;
+       else if (v == "v")
+         c.prediction = V_PRED;
+       else if (v == "edm_v")
+         c.prediction = EDM_V_PRED;
+       else if (v == "flow")
+         c.prediction = FLOW_PRED;
+       else if (v == "flux_flow")
+         c.prediction = FLUX_FLOW_PRED;
+       else if (v == "flux2_flow")
+         c.prediction = FLUX2_FLOW_PRED;
+       else
+         throw StatusError(
+             general_error::InvalidArgument,
+             "prediction must be one of: eps, v, edm_v, flow, flux_flow, "
+             "flux2_flow");
+     }},
+
+    // ── LoRA apply mode
+    // ────────────────────────────────────────────────────────
+
+    {"lora_apply_mode",
+     [](SdCtxConfig& c, const std::string& v) {
+       if (v == "auto")
+         c.loraApplyMode = LORA_APPLY_AUTO;
+       else if (v == "immediately")
+         c.loraApplyMode = LORA_APPLY_IMMEDIATELY;
+       else if (v == "at_runtime")
+         c.loraApplyMode = LORA_APPLY_AT_RUNTIME;
+       else
+         throw StatusError(
+             general_error::InvalidArgument,
+             "lora_apply_mode must be 'auto', 'immediately', or 'at_runtime'");
+     }},
+
+    // ── Flow matching (FLUX)
+    // ───────────────────────────────────────────────────
+
+    {"flow_shift",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.flowShift = parseFloat(v, "flow_shift");
+     }},
+
+    // ── Convolution optimisations
+    // ──────────────────────────────────────────────
+
+    {"diffusion_conv_direct",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.diffusionConvDirect = parseBool(v, "diffusion_conv_direct");
+     }},
+
+    {"vae_conv_direct",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.vaeConvDirect = parseBool(v, "vae_conv_direct");
+     }},
+
+    // ── SDXL compat
+    // ────────────────────────────────────────────────────────────
+
+    {"force_sdxl_vae_conv_scale",
+     [](SdCtxConfig& c, const std::string& v) {
+       c.forceSDXLVaeConvScale = parseBool(v, "force_sdxl_vae_conv_scale");
+     }},
+
+    // ── Backend loading
+    // ────────────────────────────────────────────────────────────
+
+    {"backendsDir",
+     [](SdCtxConfig& c, const std::string& v) { c.backendsDir = v; }},
+
+    // ── Logging
+    // ────────────────────────────────────────────────────────────────
+
+    {"verbosity",
+     [](SdCtxConfig& /*c*/, const std::string& v) {
+       std::unordered_map<std::string, std::string> m{{"verbosity", v}};
+       logging::setVerbosityLevel(m);
+     }},
+
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+
+void applySdCtxHandlers(
+    SdCtxConfig& config,
+    const std::unordered_map<std::string, std::string>& configMap) {
+  for (const auto& [key, value] : configMap) {
+    if (auto it = SD_CTX_HANDLERS.find(key); it != SD_CTX_HANDLERS.end()) {
+      it->second(config, value);
+    }
+    // Unknown keys are silently ignored for forward compatibility.
+  }
+}
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.hpp b/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.hpp
new file mode 100644
index 0000000000..a0e80cdf7c
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/handlers/SdCtxHandlers.hpp
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <functional>
+#include <limits>
+#include <string>
+#include <unordered_map>
+
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+#include <stable-diffusion.h>
+
+namespace qvac_lib_inference_addon_sd {
+
+/**
+ * All load-time configuration for the stable-diffusion context.
+ *
+ * Populated in two steps inside AddonJs::createInstance:
+ *   1. Paths set directly from JS args (path, clipLPath, llmPath, …)
+ *   2. Config options resolved via applySdCtxHandlers(config, configMap)
+ *
+ * Consumed once in SdModel::load() where new_sd_ctx() is called.
+ *
+ * Supported models:
+ *   SD1.x        — uses modelPath (all-in-one .ckpt / .safetensors / GGUF)
+ *   SD2.x        — same as SD1, add prediction="v" to the config
+ *   SDXL         — uses modelPath (all-in-one GGUF); set
+ * force_sdxl_vae_conv_scale if needed SD3 Medium   — all-in-one GGUF via
+ * modelPath (CLIP-L, CLIP-G, T5-XXL baked in) OR split layout:
+ * diffusionModelPath + clipLPath + clipGPath + t5XxlPath FLUX.2 [klein] — uses
+ * diffusionModelPath + llmPath (Qwen3) + vaePath
+ */
+struct SdCtxConfig {
+  // ── Model file paths ───────────────────────────────────────────────────────
+  // All paths are absolute; empty string = not used.
+
+  std::string modelPath; // model_path            — SD1.x/SD2.x/SDXL/SD3
+                         // all-in-one checkpoint
+  std::string diffusionModelPath; // diffusion_model_path  — FLUX.2 [klein] or
+                                  // SD3 pure diffusion GGUF
+  std::string clipLPath; // clip_l_path           — CLIP-L text encoder (SD3
+                         // split / SDXL)
+  std::string clipGPath; // clip_g_path           — CLIP-G text encoder (SD3
+                         // split / SDXL)
+  std::string
+      t5XxlPath; // t5xxl_path            — T5-XXL text encoder (SD3 split)
+  std::string
+      llmPath; // llm_path              — LLM text encoder (FLUX.2 → Qwen3)
+  std::string vaePath; // vae_path              — standalone VAE decoder weights
+  std::string taesdPath; // taesd_path            — Tiny AutoEncoder (optional
+                         // fast preview)
+
+  // ── Compute ───────────────────────────────────────────────────────────────
+  int nThreads = -1; // n_threads:            -1 = auto-detect physical cores
+  bool flashAttn = false; // flash_attn:           full-model flash attention
+  bool diffusionFlashAttn =
+      false; // diffusion_flash_attn: flash attention on diffusion only
+
+  // ── Memory management ─────────────────────────────────────────────────────
+  bool mmap = false;         // enable_mmap:           memory-map the GGUF file
+  bool offloadToCpu = false; // offload_params_to_cpu: keep weights in RAM, load
+                             // per-layer to GPU
+  std::string device = "gpu"; // "cpu" or "gpu" — selects compute backend
+  bool keepClipOnCpu =
+      false; // keep_clip_on_cpu:      keep CLIP encoder in CPU RAM
+  bool keepVaeOnCpu =
+      false; // keep_vae_on_cpu:       keep VAE decoder in CPU RAM
+
+  // ── Precision ─────────────────────────────────────────────────────────────
+  sd_type_t wtype =
+      SD_TYPE_COUNT; // global weight type override; COUNT = auto (use GGUF)
+  std::string tensorTypeRules; // per-tensor rules e.g. "^vae.=f16,model.=q8_0"
+
+  // ── Sampling RNG (Random Number Generator) ────────────────────────────────
+  // CUDA_RNG = philox RNG (default in sd_ctx_params_init; not GPU-specific
+  // despite the name) RNG_TYPE_COUNT = auto for sampler RNG
+  rng_type_t rngType = CUDA_RNG;              // rng_type
+  rng_type_t samplerRngType = RNG_TYPE_COUNT; // sampler_rng_type
+
+  // ── Prediction type ───────────────────────────────────────────────────────
+  // PREDICTION_COUNT = auto-detect from model GGUF metadata (recommended).
+  // Override if the GGUF lacks metadata (community conversions often do):
+  //   EPS_PRED        → SD1.x
+  //   V_PRED          → SD2.x
+  //   FLOW_PRED       → SD3 (flow matching)
+  //   FLUX2_FLOW_PRED → FLUX.2 [klein]
+  prediction_t prediction = PREDICTION_COUNT; // auto
+
+  // ── LoRA (Low-Rank Adaptation) apply mode ─────────────────────────────────
+  lora_apply_mode_t loraApplyMode = LORA_APPLY_AUTO;
+
+  // ── Flow matching (FLUX, SD3) ─────────────────────────────────────────────
+  // INFINITY = use the model's embedded flow_shift value (recommended).
+  // Override only to tune noise-schedule quality.
+  float flowShift = std::numeric_limits<float>::infinity();
+
+  // ── Convolution kernel options ────────────────────────────────────────────
+  bool diffusionConvDirect = false; // ggml_conv2d_direct in diffusion model
+  bool vaeConvDirect = false;       // ggml_conv2d_direct in VAE
+
+  // ── SDXL compatibility ────────────────────────────────────────────────────
+  bool forceSDXLVaeConvScale = false; // force SDXL VAE conv scale (compat fix)
+
+  // ── Backend loading ────────────────────────────────────────────────────────
+  std::string backendsDir; // directory containing DL backend .so modules
+
+  // ── Internal ──────────────────────────────────────────────────────────────
+  // Upstream defaults to true, which frees model weight buffers after each
+  // generate_image_internal() call. The addon reuses a single sd_ctx across
+  // multiple generations, so freeing params after the first run causes a
+  // use-after-free SIGSEGV on the second run (including cancel-then-rerun).
+  bool freeParamsImmediately = false;
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Handler function for a single configMap key.
+ * Receives the config struct (by ref) and the raw string value from JS.
+ * Throws qvac_errors::StatusError on invalid input.
+ */
+using SdCtxHandlerFn = std::function<void(SdCtxConfig&, const std::string&)>;
+using SdCtxHandlersMap = std::unordered_map<std::string, SdCtxHandlerFn>;
+
+/** All supported load-time config keys and their handlers. */
+extern const SdCtxHandlersMap SD_CTX_HANDLERS;
+
+/**
+ * Apply SD_CTX_HANDLERS to configMap, writing results into config.
+ * Unknown keys are silently ignored (forward compatibility).
+ */
+void applySdCtxHandlers(
+    SdCtxConfig& config,
+    const std::unordered_map<std::string, std::string>& configMap);
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.cpp b/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.cpp
new file mode 100644
index 0000000000..51bbbf8b33
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.cpp
@@ -0,0 +1,376 @@
+#include "SdGenHandlers.hpp"
+
+#include <charconv>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+
+namespace qvac_lib_inference_addon_sd {
+
+using namespace qvac_errors;
+
+// ── JSON value helpers
+// ────────────────────────────────────────────────────────
+
+static double requireNum(const picojson::value& v, const std::string& key) {
+  if (!v.is<double>())
+    throw StatusError(
+        general_error::InvalidArgument, key + " must be a number");
+  return v.get<double>();
+}
+
+static std::string
+requireStr(const picojson::value& v, const std::string& key) {
+  if (!v.is<std::string>())
+    throw StatusError(
+        general_error::InvalidArgument, key + " must be a string");
+  return v.get<std::string>();
+}
+
+// ── Enum parsers ─────────────────────────────────────────────────────────────
+
+static sample_method_t parseSampler(const std::string& name) {
+  static const std::unordered_map<std::string, sample_method_t> samplers{
+      {"euler", EULER_SAMPLE_METHOD},
+      {"euler_a", EULER_A_SAMPLE_METHOD},
+      {"heun", HEUN_SAMPLE_METHOD},
+      {"dpm2", DPM2_SAMPLE_METHOD},
+      {"dpm++2m", DPMPP2M_SAMPLE_METHOD},
+      {"dpm++2mv2", DPMPP2Mv2_SAMPLE_METHOD},
+      {"dpm++2s_a", DPMPP2S_A_SAMPLE_METHOD},
+      {"lcm", LCM_SAMPLE_METHOD},
+      {"ipndm", IPNDM_SAMPLE_METHOD},
+      {"ipndm_v", IPNDM_V_SAMPLE_METHOD},
+      {"ddim_trailing", DDIM_TRAILING_SAMPLE_METHOD},
+      {"tcd", TCD_SAMPLE_METHOD},
+      {"res_multistep", RES_MULTISTEP_SAMPLE_METHOD},
+      {"res_2s", RES_2S_SAMPLE_METHOD},
+  };
+  if (auto it = samplers.find(name); it != samplers.end()) {
+    return it->second;
+  }
+  throw StatusError(
+      general_error::InvalidArgument,
+      "sampling_method: unknown value '" + name +
+          "'. Valid: euler, euler_a, heun, dpm2, dpm++2m, dpm++2mv2, "
+          "dpm++2s_a, lcm, ipndm, ipndm_v, ddim_trailing, tcd, "
+          "res_multistep, res_2s");
+}
+
+static scheduler_t parseScheduler(const std::string& name) {
+  static const std::unordered_map<std::string, scheduler_t> schedulers{
+      {"discrete", DISCRETE_SCHEDULER},
+      {"karras", KARRAS_SCHEDULER},
+      {"exponential", EXPONENTIAL_SCHEDULER},
+      {"ays", AYS_SCHEDULER},
+      {"gits", GITS_SCHEDULER},
+      {"sgm_uniform", SGM_UNIFORM_SCHEDULER},
+      {"simple", SIMPLE_SCHEDULER},
+      {"lcm", LCM_SCHEDULER},
+      {"smoothstep", SMOOTHSTEP_SCHEDULER},
+      {"kl_optimal", KL_OPTIMAL_SCHEDULER},
+      {"bong_tangent", BONG_TANGENT_SCHEDULER},
+  };
+  if (auto it = schedulers.find(name); it != schedulers.end()) {
+    return it->second;
+  }
+  throw StatusError(
+      general_error::InvalidArgument,
+      "scheduler: unknown value '" + name +
+          "'. Valid: discrete, karras, exponential, ays, gits, "
+          "sgm_uniform, simple, lcm, smoothstep, kl_optimal, bong_tangent");
+}
+
+// Parses "vae_tile_size": accepts either an integer (applied to both axes)
+// or a "WxH" string (e.g. "128x64").
+static std::pair<int, int> parseVaeTileSize(const picojson::value& v) {
+  if (v.is<double>()) {
+    int sz = static_cast<int>(v.get<double>());
+    return {sz, sz};
+  }
+  if (!v.is<std::string>()) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "vae_tile_size must be a number or 'WxH' string");
+  }
+
+  const std::string_view s = v.get<std::string>();
+  const auto xPos = s.find('x');
+  if (xPos == std::string_view::npos) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "vae_tile_size string must be 'WxH', got: '" + std::string(s) + "'");
+  }
+
+  int w{}, h{};
+  const auto wSv = s.substr(0, xPos);
+  const auto hSv = s.substr(xPos + 1);
+  if (std::from_chars(wSv.data(), wSv.data() + wSv.size(), w).ec !=
+          std::errc{} ||
+      std::from_chars(hSv.data(), hSv.data() + hSv.size(), h).ec !=
+          std::errc{}) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "vae_tile_size: could not parse dimensions from '" + std::string(s) +
+            "'");
+  }
+  return {w, h};
+}
+
+static sd_cache_mode_t parseCacheMode(const std::string& name) {
+  static const std::unordered_map<std::string, sd_cache_mode_t> cacheModes{
+      {"", SD_CACHE_DISABLED},
+      {"disabled", SD_CACHE_DISABLED},
+      {"easycache", SD_CACHE_EASYCACHE},
+      {"ucache", SD_CACHE_UCACHE},
+      {"dbcache", SD_CACHE_DBCACHE},
+      {"taylorseer", SD_CACHE_TAYLORSEER},
+      {"cache-dit", SD_CACHE_CACHE_DIT},
+  };
+  if (auto it = cacheModes.find(name); it != cacheModes.end()) {
+    return it->second;
+  }
+  throw StatusError(
+      general_error::InvalidArgument,
+      "cache_mode: unknown value '" + name +
+          "'. Valid: disabled, easycache, ucache, dbcache, taylorseer, "
+          "cache-dit");
+}
+
+// ── Handler map
+// ───────────────────────────────────────────────────────────────
+
+const SdGenHandlersMap SD_GEN_HANDLERS = {
+
+    // ── Mode
+    // ────────────────────────────────────────────────────────────────────
+
+    {"mode",
+     [](SdGenConfig& c, const picojson::value& v) {
+       const auto mode = requireStr(v, "mode");
+       if (mode != "txt2img" && mode != "img2img")
+         throw StatusError(
+             general_error::InvalidArgument,
+             "mode must be 'txt2img' or 'img2img', got: '" + mode + "'");
+       c.mode = mode;
+     }},
+
+    // ── Prompt
+    // ──────────────────────────────────────────────────────────────────
+
+    {"prompt",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.prompt = requireStr(v, "prompt");
+     }},
+    {"negative_prompt",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.negativePrompt = requireStr(v, "negative_prompt");
+     }},
+
+    // ── Image dimensions
+    // ────────────────────────────────────────────────────────
+
+    {"width",
+     [](SdGenConfig& c, const picojson::value& v) {
+       int w = static_cast<int>(requireNum(v, "width"));
+       if (w <= 0 || w % 8 != 0)
+         throw StatusError(
+             general_error::InvalidArgument,
+             "width must be a positive multiple of 8, got: " +
+                 std::to_string(w));
+       c.width = w;
+     }},
+
+    {"height",
+     [](SdGenConfig& c, const picojson::value& v) {
+       int h = static_cast<int>(requireNum(v, "height"));
+       if (h <= 0 || h % 8 != 0)
+         throw StatusError(
+             general_error::InvalidArgument,
+             "height must be a positive multiple of 8, got: " +
+                 std::to_string(h));
+       c.height = h;
+     }},
+
+    // ── Sampling
+    // ────────────────────────────────────────────────────────────────
+
+    {"steps",
+     [](SdGenConfig& c, const picojson::value& v) {
+       int s = static_cast<int>(requireNum(v, "steps"));
+       if (s <= 0)
+         throw StatusError(general_error::InvalidArgument, "steps must be > 0");
+       c.steps = s;
+     }},
+
+    // Both "sampling_method" and "sampler" are accepted.
+    {"sampling_method",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.sampleMethod = parseSampler(requireStr(v, "sampling_method"));
+     }},
+    {"sampler",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.sampleMethod = parseSampler(requireStr(v, "sampler"));
+     }},
+
+    {"scheduler",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.scheduler = parseScheduler(requireStr(v, "scheduler"));
+     }},
+
+    {"eta",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.eta = static_cast<float>(requireNum(v, "eta"));
+     }},
+
+    // ── Guidance
+    // ────────────────────────────────────────────────────────────────
+
+    {"cfg_scale",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.cfgScale = static_cast<float>(requireNum(v, "cfg_scale"));
+     }},
+
+    // distilled_guidance — FLUX.2 specific; separate from cfg_scale.
+    // Default 3.5 is the FLUX recommendation. Too low = washed out, too high =
+    // over-saturated.
+    {"guidance",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.guidance = static_cast<float>(requireNum(v, "guidance"));
+     }},
+
+    // img_cfg — image guidance for img2img / inpaint workflows; -1 = use
+    // cfg_scale.
+    {"img_cfg_scale",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.imgCfgScale = static_cast<float>(requireNum(v, "img_cfg_scale"));
+     }},
+
+    // ── Reproducibility
+    // ─────────────────────────────────────────────────────────
+
+    {"seed",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.seed = static_cast<int64_t>(requireNum(v, "seed"));
+     }},
+
+    // ── Batching
+    // ────────────────────────────────────────────────────────────────
+
+    {"batch_count",
+     [](SdGenConfig& c, const picojson::value& v) {
+       int b = static_cast<int>(requireNum(v, "batch_count"));
+       if (b <= 0)
+         throw StatusError(
+             general_error::InvalidArgument, "batch_count must be > 0");
+       c.batchCount = b;
+     }},
+
+    // ── img2img
+    // ─────────────────────────────────────────────────────────────────
+
+    {"strength",
+     [](SdGenConfig& c, const picojson::value& v) {
+       float s = static_cast<float>(requireNum(v, "strength"));
+       if (s < 0.0f || s > 1.0f)
+         throw StatusError(
+             general_error::InvalidArgument,
+             "strength must be in [0, 1], got: " + std::to_string(s));
+       c.strength = s;
+     }},
+
+    // clip_skip — skip last N CLIP layers. Used by SD1.x / SD2.x fine-tunes.
+    // -1 = auto (1 for SD1, 2 for SD2). Ignored for FLUX.
+    {"clip_skip",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.clipSkip = static_cast<int>(requireNum(v, "clip_skip"));
+     }},
+
+    // ── VAE tiling
+    // ──────────────────────────────────────────────────────────────
+
+    {"vae_tiling",
+     [](SdGenConfig& c, const picojson::value& v) {
+       if (!v.is<bool>())
+         throw StatusError(
+             general_error::InvalidArgument, "vae_tiling must be a boolean");
+       c.vaeTiling = v.get<bool>();
+     }},
+
+    // vae_tile_size accepts either an integer (applied to both axes) or "WxH"
+    // string.
+    {"vae_tile_size",
+     [](SdGenConfig& c, const picojson::value& v) {
+       auto [w, h] = parseVaeTileSize(v);
+       c.vaeTileSizeX = w;
+       c.vaeTileSizeY = h;
+     }},
+
+    {"vae_tile_overlap",
+     [](SdGenConfig& c, const picojson::value& v) {
+       float overlap = static_cast<float>(requireNum(v, "vae_tile_overlap"));
+       if (overlap < 0.0f || overlap >= 1.0f)
+         throw StatusError(
+             general_error::InvalidArgument,
+             "vae_tile_overlap must be in [0, 1), got: " +
+                 std::to_string(overlap));
+       c.vaeTileOverlap = overlap;
+     }},
+
+    // ── Step-caching
+    // ────────────────────────────────────────────────────────────
+    // cache_mode selects the algorithm. cache_preset is a convenience shorthand
+    // that sets both the mode and sensible threshold defaults.
+
+    {"cache_mode",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.cacheMode = parseCacheMode(requireStr(v, "cache_mode"));
+     }},
+
+    // cache_preset — shorthand for "easycache + threshold".
+    {"cache_preset",
+     [](SdGenConfig& c, const picojson::value& v) {
+       // Approximate threshold values mirroring the stable-diffusion.cpp CLI
+       // presets:  slow ≈ 0.60 (~10% speed-up)  medium ≈ 0.40 (~25%)
+       //           fast ≈ 0.25 (~40%)            ultra  ≈ 0.15 (fastest)
+       using Preset = std::pair<sd_cache_mode_t, float>;
+       static const std::unordered_map<std::string, Preset> presets{
+           {"slow", {SD_CACHE_EASYCACHE, 0.60f}},
+           {"medium", {SD_CACHE_EASYCACHE, 0.40f}},
+           {"fast", {SD_CACHE_EASYCACHE, 0.25f}},
+           {"ultra", {SD_CACHE_EASYCACHE, 0.15f}},
+       };
+       const auto preset = requireStr(v, "cache_preset");
+       if (auto it = presets.find(preset); it != presets.end()) {
+         c.cacheMode = it->second.first;
+         c.cacheThreshold = it->second.second;
+       } else {
+         throw StatusError(
+             general_error::InvalidArgument,
+             "cache_preset must be 'slow', 'medium', 'fast', or 'ultra'");
+       }
+     }},
+
+    // cache_threshold — direct override for reuse_threshold; 0 = library
+    // default.
+    {"cache_threshold",
+     [](SdGenConfig& c, const picojson::value& v) {
+       c.cacheThreshold = static_cast<float>(requireNum(v, "cache_threshold"));
+     }},
+
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+
+void applySdGenHandlers(SdGenConfig& config, const picojson::object& obj) {
+  for (const auto& [key, value] : obj) {
+    if (auto it = SD_GEN_HANDLERS.find(key); it != SD_GEN_HANDLERS.end()) {
+      it->second(config, value);
+    }
+    // Unknown keys are silently ignored for forward compatibility.
+  }
+}
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.hpp b/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.hpp
new file mode 100644
index 0000000000..c234ea2c0e
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/handlers/SdGenHandlers.hpp
@@ -0,0 +1,100 @@
+#pragma once
+
+#include <functional>
+#include <string>
+#include <unordered_map>
+
+#include <picojson/picojson.h>
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+#include <stable-diffusion.h>
+
+namespace qvac_lib_inference_addon_sd {
+
+/**
+ * All per-job generation parameters for a single txt2img or img2img call.
+ *
+ * Populated by applySdGenHandlers() inside SdModel::process(), then mapped
+ * to sd_img_gen_params_t before generate_image() is called.
+ *
+ * txt2vid (video) is intentionally unsupported.
+ */
+struct SdGenConfig {
+
+  // ── Mode ──────────────────────────────────────────────────────────────────
+  std::string mode = "txt2img"; // "txt2img" (default) or "img2img"
+
+  // ── Prompt ────────────────────────────────────────────────────────────────
+  std::string prompt;
+  std::string negativePrompt;
+
+  // ── Image dimensions ─────────────────────────────────────────────────────
+  int width = 512; // must be a positive multiple of 8
+  int height = 512;
+
+  // ── Sampling ──────────────────────────────────────────────────────────────
+  // SAMPLE_METHOD_COUNT / SCHEDULER_COUNT = "auto" — stable-diffusion.cpp
+  // selects the correct default for each model family at runtime:
+  //   DiT / FLUX → euler + karras   SD1/SD2 → euler_a + discrete
+  int steps = 20;
+  sample_method_t sampleMethod = SAMPLE_METHOD_COUNT; // auto
+  scheduler_t scheduler = SCHEDULER_COUNT;            // auto
+  float eta = 0.0f; // stochasticity for DDIM / TCD samplers
+
+  // ── Guidance ─────────────────────────────────────────────────────────────
+  float cfgScale =
+      7.0f; // txt_cfg  — CFG (Classifier-Free Guidance) for SD1/SD2
+  float guidance = 3.5f; // distilled_guidance — FLUX.2 flow-matching scale
+  float imgCfgScale =
+      -1.0f; // img_cfg  — image guidance for img2img/inpaint; -1 = cfgScale
+
+  // ── Reproducibility ───────────────────────────────────────────────────────
+  int64_t seed = -1; // -1 = random
+
+  // ── Batching ──────────────────────────────────────────────────────────────
+  int batchCount = 1;
+
+  // ── img2img / inpaint ─────────────────────────────────────────────────────
+  float strength = 0.75f; // denoising strength: 0 = keep init, 1 = ignore it
+  int clipSkip =
+      -1; // skip last N CLIP encoder layers (SD1.x / SD2.x); -1 = auto
+
+  // ── VAE tiling — required for images > ~768px on 16 GB machines ──────────
+  // Maps to sd_img_gen_params_t.vae_tiling_params
+  bool vaeTiling = false;
+  int vaeTileSizeX = 512;      // tile width  in pixels
+  int vaeTileSizeY = 512;      // tile height in pixels
+  float vaeTileOverlap = 0.5f; // fraction of tile used as overlap seam (0–1)
+
+  // ── Step-caching (cuts FLUX generation time by 30–50%) ───────────────────
+  // Maps to sd_img_gen_params_t.cache
+  // cache_mode: "disabled", "easycache" (DiT), "ucache" (UNet), "dbcache",
+  //             "taylorseer", "cache-dit"
+  // cache_preset: "slow", "medium", "fast", "ultra" (shorthand for threshold)
+  // cache_threshold: direct override for reuse_threshold (0 = library default)
+  sd_cache_mode_t cacheMode = SD_CACHE_DISABLED;
+  float cacheThreshold = 0.0f; // reuse_threshold; 0 = use library default
+  float cacheStart = 0.0f;     // start_percent;   0 = use library default
+  float cacheEnd = 0.0f;       // end_percent;     0 = use library default
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Handler function for a single per-job JSON key.
+ * Receives the config struct (by ref) and the raw picojson::value.
+ * Throws qvac_errors::StatusError on invalid input.
+ */
+using SdGenHandlerFn =
+    std::function<void(SdGenConfig&, const picojson::value&)>;
+using SdGenHandlersMap = std::unordered_map<std::string, SdGenHandlerFn>;
+
+/** All supported per-job generation param keys and their handlers. */
+extern const SdGenHandlersMap SD_GEN_HANDLERS;
+
+/**
+ * Apply SD_GEN_HANDLERS to a parsed JSON params object, writing into config.
+ * Unknown keys are silently ignored (forward compatibility).
+ */
+void applySdGenHandlers(SdGenConfig& config, const picojson::object& obj);
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/js-interface/binding.cpp b/packages/lib-infer-diffusion/addon/src/js-interface/binding.cpp
new file mode 100644
index 0000000000..9bd594aeec
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/js-interface/binding.cpp
@@ -0,0 +1,33 @@
+#include <bare.h>
+
+#include "../addon/AddonJs.hpp"
+
+js_value_t* qvacLibInferenceAddonSdExports(js_env_t* env, js_value_t* exports) {
+
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define V(name, fn)                                                            \
+  {                                                                            \
+    js_value_t* val;                                                           \
+    if (js_create_function(env, name, -1, fn, nullptr, &val) != 0) {           \
+      return nullptr;                                                          \
+    }                                                                          \
+    if (js_set_named_property(env, exports, name, val) != 0) {                 \
+      return nullptr;                                                          \
+    }                                                                          \
+  }
+
+  V("createInstance", qvac_lib_inference_addon_sd::createInstance)
+  V("runJob", qvac_lib_inference_addon_sd::runJob)
+
+  V("activate", qvac_lib_inference_addon_sd::activate)
+  V("cancel", qvac_lib_inference_addon_cpp::JsInterface::cancel)
+  V("destroyInstance",
+    qvac_lib_inference_addon_cpp::JsInterface::destroyInstance)
+  V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger)
+  V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger)
+
+#undef V
+  return exports;
+}
+
+BARE_MODULE(qvac_lib_inference_addon_sd, qvacLibInferenceAddonSdExports)
diff --git a/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.cpp b/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.cpp
new file mode 100644
index 0000000000..060aab4f04
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.cpp
@@ -0,0 +1,675 @@
+#include "SdModel.hpp"
+
+#include <chrono>
+#include <cstdlib>
+#include <cstring>
+#include <filesystem>
+#include <sstream>
+#include <system_error>
+#include <vector>
+
+#define STB_IMAGE_IMPLEMENTATION
+#include <stb_image.h>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <ggml-backend.h>
+#include <picojson/picojson.h>
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+#include <qvac-lib-inference-addon-cpp/Logger.hpp>
+#include <stb_image_write.h>
+
+#include "utils/BackendSelection.hpp"
+#include "utils/LoggingMacros.hpp"
+
+using namespace qvac_lib_inference_addon_cpp;
+using namespace qvac_errors;
+
+// ---------------------------------------------------------------------------
+// Thread-local progress context — sd progress callbacks are process-global,
+// so we park the current job pointer in TLS to route progress back.
+// ---------------------------------------------------------------------------
+namespace {
+
+struct ProgressCtx {
+  const SdModel::GenerationJob* job = nullptr;
+  std::chrono::steady_clock::time_point startTime;
+};
+
+thread_local ProgressCtx tl_progressCtx;
+// Thread-local model pointer for abort callback routing — same pattern as
+// tl_progressCtx for progress.  Avoids relying on the process-global
+// sd_abort_cb_data when multiple SdModel instances could coexist.
+thread_local const SdModel* tl_abortModel = nullptr;
+
+std::string backendDeviceTypeToString(enum ggml_backend_dev_type type) {
+  switch (type) {
+  case GGML_BACKEND_DEVICE_TYPE_CPU:
+    return "CPU";
+  case GGML_BACKEND_DEVICE_TYPE_GPU:
+    return "GPU";
+  case GGML_BACKEND_DEVICE_TYPE_IGPU:
+    return "IGPU";
+  case GGML_BACKEND_DEVICE_TYPE_ACCEL:
+    return "ACCEL";
+  default:
+    return "UNKNOWN";
+  }
+}
+
+std::string preferredBackendToString(enum sd_backend_preference_t pref) {
+  switch (pref) {
+  case SD_BACKEND_PREF_AUTO:
+    return "auto";
+  case SD_BACKEND_PREF_CPU:
+    return "cpu";
+  case SD_BACKEND_PREF_GPU:
+    return "gpu";
+  case SD_BACKEND_PREF_OPENCL:
+    return "opencl";
+  default:
+    return "unknown";
+  }
+}
+
+void logBackendRegistrySnapshot() {
+  using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
+
+  const size_t regCount = ggml_backend_reg_count();
+  const size_t devCount = ggml_backend_dev_count();
+  QLOG_IF(
+      Priority::INFO,
+      "GGML backend registry snapshot: " + std::to_string(regCount) +
+          " registry entries, " + std::to_string(devCount) + " devices");
+
+  for (size_t i = 0; i < regCount; ++i) {
+    ggml_backend_reg_t reg = ggml_backend_reg_get(i);
+    const char* regName = reg ? ggml_backend_reg_name(reg) : nullptr;
+    const size_t regDevCount = reg ? ggml_backend_reg_dev_count(reg) : 0;
+    QLOG_IF(
+        Priority::INFO,
+        "GGML backend registry[" + std::to_string(i) + "]: name='" +
+            std::string(regName ? regName : "<null>") +
+            "', devices=" + std::to_string(regDevCount));
+  }
+
+  for (size_t i = 0; i < devCount; ++i) {
+    ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+    if (!dev) {
+      QLOG_IF(
+          Priority::WARNING,
+          "GGML backend device[" + std::to_string(i) + "]: null device handle");
+      continue;
+    }
+
+    const char* name = ggml_backend_dev_name(dev);
+    const char* desc = ggml_backend_dev_description(dev);
+    const auto type = ggml_backend_dev_type(dev);
+    size_t memFree = 0;
+    size_t memTotal = 0;
+    ggml_backend_dev_memory(dev, &memFree, &memTotal);
+
+    ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
+    const char* regName = reg ? ggml_backend_reg_name(reg) : nullptr;
+
+    QLOG_IF(
+        Priority::INFO,
+        "GGML backend device[" + std::to_string(i) + "]: name='" +
+            std::string(name ? name : "<null>") + "', desc='" +
+            std::string(desc ? desc : "<null>") +
+            "', type=" + backendDeviceTypeToString(type) + ", reg='" +
+            std::string(regName ? regName : "<null>") +
+            "', mem_free=" + std::to_string(memFree) +
+            ", mem_total=" + std::to_string(memTotal));
+  }
+}
+
+void logBackendModulePathSnapshot(
+    const std::filesystem::path& backendsDirPath) {
+  using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
+
+  std::error_code ec;
+  const bool exists = std::filesystem::exists(backendsDirPath, ec);
+  QLOG_IF(
+      Priority::INFO,
+      "Backend module path exists=" + std::string(exists ? "true" : "false") +
+          " path='" + backendsDirPath.string() + "'");
+  if (ec) {
+    QLOG_IF(
+        Priority::WARNING,
+        "Backend module path existence check error: " + ec.message());
+    return;
+  }
+  if (!exists) {
+    return;
+  }
+
+  const bool isDir = std::filesystem::is_directory(backendsDirPath, ec);
+  QLOG_IF(
+      Priority::INFO,
+      "Backend module path is_directory=" +
+          std::string(isDir ? "true" : "false"));
+  if (ec || !isDir) {
+    if (ec) {
+      QLOG_IF(
+          Priority::WARNING,
+          "Backend module path type check error: " + ec.message());
+    }
+    return;
+  }
+
+  std::vector<std::string> entries;
+  for (const auto& dirEntry :
+       std::filesystem::directory_iterator(backendsDirPath, ec)) {
+    if (ec) {
+      QLOG_IF(
+          Priority::WARNING,
+          "Backend module path iteration error: " + ec.message());
+      break;
+    }
+    const auto filename = dirEntry.path().filename().string();
+    if (filename.rfind("libqvac-diffusion-ggml-", 0) == 0 &&
+        dirEntry.path().extension() == ".so") {
+      entries.push_back(filename);
+    }
+  }
+
+  if (entries.empty()) {
+    QLOG_IF(
+        Priority::WARNING,
+        "No qvac diffusion GGML backend modules found under: " +
+            backendsDirPath.string());
+    return;
+  }
+
+  std::ostringstream oss;
+  for (size_t i = 0; i < entries.size(); ++i) {
+    if (i > 0) {
+      oss << ", ";
+    }
+    oss << entries[i];
+  }
+  QLOG_IF(
+      Priority::INFO,
+      "Detected qvac diffusion GGML backend modules: " + oss.str());
+}
+
+void sdProgressCallback(int step, int steps, float /*time*/, void* /*data*/) {
+  if (!tl_progressCtx.job || !tl_progressCtx.job->progressCallback)
+    return;
+
+  const auto elapsed =
+      std::chrono::duration_cast<std::chrono::milliseconds>(
+          std::chrono::steady_clock::now() - tl_progressCtx.startTime)
+          .count();
+
+  std::ostringstream oss;
+  oss << R"({"step":)" << step << R"(,"total":)" << steps << R"(,"elapsed_ms":)"
+      << elapsed << "}";
+
+  tl_progressCtx.job->progressCallback(oss.str());
+}
+
+// Abort callback — wired into sd_set_abort_callback() so that
+// generate_image() can be interrupted mid-denoising.
+// Reads from thread-local tl_abortModel (not the global sd_abort_cb_data)
+// to avoid concurrency issues when multiple SdModel instances coexist.
+bool sdAbortCallback(void* /*data*/) {
+  return tl_abortModel && tl_abortModel->isCancelRequested();
+}
+
+// RAII wrapper for the sd_image_t* array returned by generate_image().
+// Frees each image's pixel buffer and the array itself on destruction,
+// even if an exception is thrown mid-iteration (e.g. in encodeToPng or
+// outputCallback).  Call release(i) after processing image i to free
+// its pixel buffer immediately rather than waiting until destruction.
+class SdImageBatch {
+public:
+  SdImageBatch(sd_image_t* data, int count) : data_(data), count_(count) {}
+  ~SdImageBatch() {
+    for (int i = 0; i < count_; ++i) {
+      free(data_[i].data);
+    }
+    free(data_);
+  }
+
+  SdImageBatch(const SdImageBatch&) = delete;
+  SdImageBatch& operator=(const SdImageBatch&) = delete;
+  SdImageBatch(SdImageBatch&&) = delete;
+  SdImageBatch& operator=(SdImageBatch&&) = delete;
+
+  [[nodiscard]] int count() const { return count_; }
+  [[nodiscard]] const sd_image_t& operator[](int i) const { return data_[i]; }
+
+  // Release pixel buffer for image i immediately after it has been consumed.
+  void release(int i) {
+    free(data_[i].data);
+    data_[i].data = nullptr;
+  }
+
+private:
+  sd_image_t* const data_;
+  const int count_;
+};
+
+} // namespace
+
+// ---------------------------------------------------------------------------
+// Constructor — stores config, allocates nothing
+// ---------------------------------------------------------------------------
+
+SdModel::SdModel(qvac_lib_inference_addon_sd::SdCtxConfig config)
+    : config_(std::move(config)), sdCtx_(nullptr, &free_sd_ctx) {
+
+  sd_set_log_callback(SdModel::sdLogCallback, nullptr);
+}
+
+// ---------------------------------------------------------------------------
+// Destructor — releases the sd_ctx and all associated GPU/CPU memory
+// ---------------------------------------------------------------------------
+
+SdModel::~SdModel() = default;
+
+// ---------------------------------------------------------------------------
+// load() — maps SdCtxConfig → sd_ctx_params_t, then calls new_sd_ctx()
+// ---------------------------------------------------------------------------
+
+void SdModel::load() {
+  if (isLoaded())
+    return;
+
+  const auto tLoadStart = std::chrono::steady_clock::now();
+
+  sd_ctx_params_t params{};
+  sd_ctx_params_init(&params);
+
+  // ── Model paths ────────────────────────────────────────────────────────────
+  // For FLUX.2 [klein] the GGUF contains only diffusion weights with no SD
+  // version metadata KV pairs, so we must use diffusion_model_path.
+  // Classic all-in-one SD1.x / SDXL checkpoints use model_path.
+  auto optPath = [](const std::string& s) -> const char* {
+    return s.empty() ? nullptr : s.c_str();
+  };
+  params.model_path = optPath(config_.modelPath);
+  params.diffusion_model_path = optPath(config_.diffusionModelPath);
+  params.clip_l_path = optPath(config_.clipLPath);
+  params.clip_g_path = optPath(config_.clipGPath);
+  params.t5xxl_path = optPath(config_.t5XxlPath);
+  params.llm_path = optPath(config_.llmPath);
+  params.vae_path = optPath(config_.vaePath);
+  params.taesd_path = optPath(config_.taesdPath);
+
+  // ── Compute ────────────────────────────────────────────────────────────────
+  params.n_threads = config_.nThreads;
+  params.flash_attn = config_.flashAttn;
+  params.diffusion_flash_attn = config_.diffusionFlashAttn;
+
+  // Load DL GPU backend modules before probing devices / creating the SD
+  // context. In GGML_BACKEND_DL mode, device enumeration is empty until these
+  // backend modules are loaded.
+#ifdef GGML_BACKEND_DL
+  {
+    static bool backendsLoaded = false;
+    if (!backendsLoaded) {
+      using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
+      if (!config_.backendsDir.empty()) {
+        std::filesystem::path backendsDirPath(config_.backendsDir);
+#ifdef BACKENDS_SUBDIR
+        backendsDirPath = backendsDirPath / BACKENDS_SUBDIR;
+        backendsDirPath = backendsDirPath.lexically_normal();
+#endif
+        QLOG_IF(
+            Priority::INFO,
+            "Loading GPU backends from: " + backendsDirPath.string());
+        logBackendModulePathSnapshot(backendsDirPath);
+        ggml_backend_load_all_from_path(backendsDirPath.string().c_str());
+      } else {
+        QLOG_IF(Priority::INFO, "Loading GPU backends from default path");
+        ggml_backend_load_all();
+      }
+      backendsLoaded = true;
+      logBackendRegistrySnapshot();
+    }
+  }
+#endif
+
+  // ── Memory management ─────────────────────────────────────────────────────
+  params.enable_mmap = config_.mmap;
+  params.offload_params_to_cpu = config_.offloadToCpu;
+
+  // Resolve the effective backend based on GPU capabilities.
+  // Adreno 800+ uses GPU (OpenCL), Adreno 600/700 is forced to CPU,
+  // everything else uses GPU (Vulkan).
+  auto preferredDevice = config_.device == "cpu"
+                             ? sd_backend_selection::BackendDevice::CPU
+                             : sd_backend_selection::BackendDevice::GPU;
+  auto effectiveDevice =
+      sd_backend_selection::resolveBackendForDevice(preferredDevice);
+  const bool preferOpenClForAdreno =
+      sd_backend_selection::shouldPreferOpenClForAdreno(preferredDevice);
+
+  if (effectiveDevice == sd_backend_selection::BackendDevice::CPU) {
+    params.preferred_gpu_backend = SD_BACKEND_PREF_CPU;
+  } else if (preferOpenClForAdreno) {
+    params.preferred_gpu_backend = SD_BACKEND_PREF_OPENCL;
+  } else {
+    params.preferred_gpu_backend = SD_BACKEND_PREF_GPU;
+  }
+
+  QLOG_IF(
+      qvac_lib_inference_addon_cpp::logger::Priority::INFO,
+      "Preferred backend passed to stable-diffusion: " +
+          preferredBackendToString(params.preferred_gpu_backend) + " (" +
+          std::to_string(static_cast<int>(params.preferred_gpu_backend)) + ")");
+
+#if defined(__APPLE__)
+  // The ggml Metal backend does not fully support GGML_OP_NORM for
+  // non-contiguous tensors (the CLIP text encoder hits this path).
+  // Force CLIP to CPU on Apple to avoid a Metal encoder abort.
+  params.keep_clip_on_cpu = true;
+#else
+  params.keep_clip_on_cpu = config_.keepClipOnCpu;
+#endif
+  params.keep_vae_on_cpu = config_.keepVaeOnCpu;
+
+  // ── Precision ─────────────────────────────────────────────────────────────
+  params.wtype = config_.wtype;
+  params.tensor_type_rules = config_.tensorTypeRules.empty()
+                                 ? nullptr
+                                 : config_.tensorTypeRules.c_str();
+
+  // ── Sampling RNG ──────────────────────────────────────────────────────────
+  params.rng_type = config_.rngType;
+  params.sampler_rng_type = config_.samplerRngType;
+
+  // ── Prediction type / LoRA ────────────────────────────────────────────────
+  params.prediction = config_.prediction;
+  params.lora_apply_mode = config_.loraApplyMode;
+
+  // ── Convolution options ───────────────────────────────────────────────────
+  params.diffusion_conv_direct = config_.diffusionConvDirect;
+  params.vae_conv_direct = config_.vaeConvDirect;
+  params.force_sdxl_vae_conv_scale = config_.forceSDXLVaeConvScale;
+
+  // ── Internal ──────────────────────────────────────────────────────────────
+  params.free_params_immediately = config_.freeParamsImmediately;
+
+  sd_ctx_t* raw = new_sd_ctx(&params);
+  if (!raw) {
+    const std::string path = config_.diffusionModelPath.empty()
+                                 ? config_.modelPath
+                                 : config_.diffusionModelPath;
+    throw StatusError(
+        general_error::InternalError,
+        "SdModel::load() failed — could not create stable-diffusion context. "
+        "Check model path and format: " +
+            path);
+  }
+
+  sdCtx_.reset(raw);
+
+  stats_.modelLoadMs = std::chrono::duration_cast<std::chrono::milliseconds>(
+                           std::chrono::steady_clock::now() - tLoadStart)
+                           .count();
+}
+
+// ---------------------------------------------------------------------------
+// process() — applies SdGenHandlers to JSON params, then calls generate_image
+// ---------------------------------------------------------------------------
+
+std::any SdModel::process(const std::any& input) {
+  if (!isLoaded()) {
+    throw StatusError(
+        general_error::InternalError,
+        "SdModel::process() called before load()");
+  }
+
+  const auto& job = std::any_cast<const GenerationJob&>(input);
+
+  cancelRequested_.store(false);
+  tl_progressCtx.job = &job;
+  tl_progressCtx.startTime = std::chrono::steady_clock::now();
+  sd_set_progress_callback(sdProgressCallback, nullptr);
+  tl_abortModel = this;
+  sd_set_abort_callback(sdAbortCallback, nullptr);
+
+  // Scope guard: clear process-global callbacks on any exit path (including
+  // early exceptions from parsing/validation before generate_image runs).
+  auto clearCallbacks = [&]() {
+    tl_progressCtx.job = nullptr;
+    tl_abortModel = nullptr;
+    sd_set_progress_callback(nullptr, nullptr);
+    sd_set_abort_callback(nullptr, nullptr);
+  };
+  struct CallbackGuard {
+    std::function<void()> fn;
+    ~CallbackGuard() { fn(); }
+  } guard{clearCallbacks};
+
+  // ── Parse JSON params ─────────────────────────────────────────────────────
+  picojson::value v;
+  const std::string parseErr = picojson::parse(v, job.paramsJson);
+  if (!parseErr.empty())
+    throw StatusError(
+        general_error::InvalidArgument,
+        "Failed to parse generation params JSON: " + parseErr);
+  if (!v.is<picojson::object>())
+    throw StatusError(
+        general_error::InvalidArgument, "Params must be a JSON object");
+
+  // ── Build SdGenConfig from handlers ───────────────────────────────────────
+  qvac_lib_inference_addon_sd::SdGenConfig gen{};
+  qvac_lib_inference_addon_sd::applySdGenHandlers(
+      gen, v.get<picojson::object>());
+
+  if (gen.mode != "txt2img" && gen.mode != "img2img")
+    throw StatusError(
+        general_error::InvalidArgument,
+        "Unsupported mode: '" + gen.mode +
+            "'. Only txt2img and img2img are supported.");
+
+  // ── Build sd_img_gen_params_t ─────────────────────────────────────────────
+  sd_img_gen_params_t genParams{};
+  sd_img_gen_params_init(&genParams);
+
+  genParams.prompt = gen.prompt.c_str();
+  genParams.negative_prompt = gen.negativePrompt.c_str();
+  genParams.width = gen.width;
+  genParams.height = gen.height;
+  genParams.seed = gen.seed;
+  genParams.batch_count = gen.batchCount;
+  genParams.strength = gen.strength;
+  genParams.clip_skip = gen.clipSkip;
+
+  genParams.sample_params.sample_method = gen.sampleMethod;
+  genParams.sample_params.scheduler = gen.scheduler;
+  genParams.sample_params.sample_steps = gen.steps;
+  genParams.sample_params.guidance.txt_cfg = gen.cfgScale;
+  genParams.sample_params.guidance.distilled_guidance = gen.guidance;
+  genParams.sample_params.guidance.img_cfg =
+      gen.imgCfgScale < 0.0f ? gen.cfgScale : gen.imgCfgScale;
+  genParams.sample_params.eta = gen.eta;
+  genParams.sample_params.flow_shift = config_.flowShift;
+
+  // ── VAE tiling ────────────────────────────────────────────────────────────
+  genParams.vae_tiling_params.enabled = gen.vaeTiling;
+  genParams.vae_tiling_params.tile_size_x = gen.vaeTileSizeX;
+  genParams.vae_tiling_params.tile_size_y = gen.vaeTileSizeY;
+  genParams.vae_tiling_params.target_overlap = gen.vaeTileOverlap;
+
+  // ── Step-caching ──────────────────────────────────────────────────────────
+  sd_cache_params_init(&genParams.cache);
+  genParams.cache.mode = gen.cacheMode;
+  if (gen.cacheThreshold > 0.0f)
+    genParams.cache.reuse_threshold = gen.cacheThreshold;
+  if (gen.cacheStart > 0.0f)
+    genParams.cache.start_percent = gen.cacheStart;
+  if (gen.cacheEnd > 0.0f)
+    genParams.cache.end_percent = gen.cacheEnd;
+
+  // ── img2img init image (bytes passed as JSON array) ───────────────────────
+  sd_image_t initImg{};
+  std::vector<uint8_t> initPng;
+
+  if (gen.mode == "img2img") {
+    if (auto it = v.get<picojson::object>().find("init_image_bytes");
+        it != v.get<picojson::object>().end() &&
+        it->second.is<picojson::array>()) {
+      const auto& arr = it->second.get<picojson::array>();
+      initPng.reserve(arr.size());
+      for (const auto& el : arr)
+        initPng.push_back(static_cast<uint8_t>(el.get<double>()));
+    }
+    if (!initPng.empty())
+      initImg = decodePng(initPng);
+  }
+  genParams.init_image = initImg;
+
+  // ── Generate ──────────────────────────────────────────────────────────────
+  const auto t0 = std::chrono::steady_clock::now();
+
+  SdImageBatch results(
+      generate_image(sdCtx_.get(), &genParams), gen.batchCount);
+
+  if (initImg.data) {
+    free(initImg.data);
+  }
+
+  const bool wasCancelled = cancelRequested_.load();
+
+  int outputCount = 0;
+  for (int i = 0; i < results.count(); ++i) {
+    if (results[i].data && !wasCancelled) {
+      auto png = encodeToPng(results[i]);
+      if (!png.empty() && job.outputCallback) {
+        job.outputCallback(png);
+        ++outputCount;
+      }
+    }
+    results.release(
+        i); // free pixel buffer immediately; destructor handles the rest
+  }
+
+  // If cancelled, propagate as an exception so JobRunner emits
+  // queueException (error path), not queueResult + queueJobEnded.
+  //
+  // This intentionally differs from the LLM addon, which returns normally
+  // on cancel (partial text output is still useful).  Diffusion produces no
+  // partial images, so a "successful" completion with output_count=0 would
+  // be misleading — throwing gives the JS caller an explicit cancel signal.
+  if (wasCancelled) {
+    throw std::runtime_error("Job cancelled");
+  }
+
+  const auto t1 = std::chrono::steady_clock::now();
+
+  // ── Accumulate cumulative counters ─────────────────────────────────────────
+  const int64_t genMsI = static_cast<int64_t>(
+      std::chrono::duration<double, std::milli>(t1 - t0).count());
+  stats_.totalGenerationMs += genMsI;
+  stats_.totalWallMs += genMsI;
+  stats_.totalSteps += gen.steps;
+  stats_.totalGenerations++;
+  stats_.totalImages += outputCount;
+  stats_.totalPixels +=
+      static_cast<int64_t>(gen.width) * gen.height * outputCount;
+
+  // ── Build stats for runtimeStats() ─────────────────────────────────────────
+  // Stats are stored and emitted via queueJobEnded() → runtimeStats().
+  // process() returns std::any{} (empty) so images delivered via
+  // outputCallback are not duplicated as a queueResult event.
+  //
+  // Only primitive (non-derivable) values are reported. Callers can compute
+  // rates such as stepsPerSecond = totalSteps / (totalWallMs / 1000.0).
+  lastStats_.clear();
+
+  lastStats_.emplace_back("modelLoadMs", stats_.modelLoadMs);
+  lastStats_.emplace_back("generationMs", genMsI);
+  lastStats_.emplace_back("totalGenerationMs", stats_.totalGenerationMs);
+  lastStats_.emplace_back("totalWallMs", stats_.totalWallMs);
+
+  lastStats_.emplace_back("totalSteps", stats_.totalSteps);
+  lastStats_.emplace_back("totalGenerations", stats_.totalGenerations);
+  lastStats_.emplace_back("totalImages", stats_.totalImages);
+  lastStats_.emplace_back("totalPixels", stats_.totalPixels);
+
+  lastStats_.emplace_back("width", static_cast<int64_t>(gen.width));
+  lastStats_.emplace_back("height", static_cast<int64_t>(gen.height));
+  lastStats_.emplace_back("seed", gen.seed);
+
+  // Return empty — images are already delivered via outputCallback,
+  // and stats are emitted by queueJobEnded() → runtimeStats().
+  return std::any{};
+}
+
+// ---------------------------------------------------------------------------
+// cancel / runtimeStats
+// ---------------------------------------------------------------------------
+
+void SdModel::cancel() const { cancelRequested_.store(true); }
+
+qvac_lib_inference_addon_cpp::RuntimeStats SdModel::runtimeStats() const {
+  return lastStats_;
+}
+
+// ---------------------------------------------------------------------------
+// PNG encode / decode (stb_image / stb_image_write)
+// ---------------------------------------------------------------------------
+
+std::vector<uint8_t> SdModel::encodeToPng(const sd_image_t& img) {
+  std::vector<uint8_t> out;
+  auto writeCallback = [](void* ctx, void* data, int size) {
+    auto* vec = static_cast<std::vector<uint8_t>*>(ctx);
+    vec->insert(
+        vec->end(),
+        static_cast<const uint8_t*>(data),
+        static_cast<const uint8_t*>(data) + size);
+  };
+  stbi_write_png_to_func(
+      writeCallback,
+      &out,
+      static_cast<int>(img.width),
+      static_cast<int>(img.height),
+      static_cast<int>(img.channel),
+      img.data,
+      static_cast<int>(img.width * img.channel));
+  return out;
+}
+
+sd_image_t SdModel::decodePng(const std::vector<uint8_t>& pngBytes) {
+  if (pngBytes.empty())
+    return sd_image_t{};
+  int w = 0, h = 0, c = 0;
+  uint8_t* data = stbi_load_from_memory(
+      pngBytes.data(), static_cast<int>(pngBytes.size()), &w, &h, &c, 3);
+  if (!data)
+    return sd_image_t{};
+  return sd_image_t{
+      static_cast<uint32_t>(w), static_cast<uint32_t>(h), 3, data};
+}
+
+// ---------------------------------------------------------------------------
+// Log callback
+// ---------------------------------------------------------------------------
+
+void SdModel::sdLogCallback(
+    sd_log_level_t level, const char* text, void* /*userData*/) {
+  namespace lg = qvac_lib_inference_addon_cpp::logger;
+  lg::Priority priority;
+  switch (level) {
+  case SD_LOG_DEBUG:
+    priority = lg::Priority::DEBUG;
+    break;
+  case SD_LOG_INFO:
+    priority = lg::Priority::INFO;
+    break;
+  case SD_LOG_WARN:
+    priority = lg::Priority::WARNING;
+    break;
+  case SD_LOG_ERROR:
+    priority = lg::Priority::ERROR;
+    break;
+  default:
+    priority = lg::Priority::ERROR;
+    break;
+  }
+  QLOG_IF(priority, std::string(text ? text : ""));
+}
diff --git a/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.hpp b/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.hpp
new file mode 100644
index 0000000000..3ccbfae347
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/model-interface/SdModel.hpp
@@ -0,0 +1,130 @@
+#pragma once
+
+#include <any>
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <qvac-lib-inference-addon-cpp/ModelInterfaces.hpp>
+#include <qvac-lib-inference-addon-cpp/RuntimeStats.hpp>
+#include <stable-diffusion.h>
+
+#include "handlers/SdCtxHandlers.hpp"
+#include "handlers/SdGenHandlers.hpp"
+
+/**
+ * Core stable-diffusion.cpp model wrapper.
+ *
+ * Supported model families:
+ *   SD1.x  — all-in-one .ckpt / .safetensors via modelPath
+ *   SD2.x  — same as SD1; set prediction="v" in context config
+ *   SDXL   — all-in-one + optional split CLIP-G; set force_sdxl_vae_conv_scale
+ * if needed FLUX.2 [klein] — split: diffusionModelPath + llmPath (Qwen3) +
+ * vaeModel
+ *
+ * Video generation (txt2vid) is intentionally unsupported.
+ *
+ * Lifecycle:
+ *   1. Construct  — stores SdCtxConfig, allocates nothing
+ *   2. load()     — calls new_sd_ctx(); weights are read from disk here
+ *   3. process()  — runs txt2img / img2img via generate_image()
+ *   4. Destroy    — destructor calls free_sd_ctx() and releases all GPU/CPU
+ *                   memory; to unload simply let the object go out of scope
+ */
+class SdModel : public qvac_lib_inference_addon_cpp::model::IModel,
+                public qvac_lib_inference_addon_cpp::model::IModelCancel {
+public:
+  SdModel(const SdModel&) = delete;
+  SdModel& operator=(const SdModel&) = delete;
+  SdModel(SdModel&&) = delete;
+  SdModel& operator=(SdModel&&) = delete;
+
+  /**
+   * Stores config. Does NOT load weights — call load() for that.
+   * @param config  Fully resolved load-time configuration (paths + context
+   * options).
+   */
+  explicit SdModel(qvac_lib_inference_addon_sd::SdCtxConfig config);
+
+  /**
+   * Releases the sd_ctx and all associated GPU/CPU memory.
+   */
+  ~SdModel() override;
+
+  [[nodiscard]] std::string getName() const final { return "SdModel"; }
+
+  // ── Lifecycle ──────────────────────────────────────────────────────────────
+
+  /**
+   * Load model weights into memory.
+   * Builds sd_ctx_params_t from the stored SdCtxConfig and calls new_sd_ctx().
+   * Throws qvac_errors::StatusError on failure.
+   * No-op if already loaded.
+   */
+  void load();
+
+  /**
+   * Returns true if weights are currently loaded (sd_ctx is live).
+   */
+  [[nodiscard]] bool isLoaded() const noexcept { return sdCtx_ != nullptr; }
+
+  // ── IModel ─────────────────────────────────────────────────────────────────
+
+  /**
+   * Run a generation job.
+   * Input must be a SdModel::GenerationJob wrapped in std::any.
+   * Throws if the model is not loaded.
+   */
+  std::any process(const std::any& input) final;
+
+  // ── IModelCancel ───────────────────────────────────────────────────────────
+
+  void cancel() const final;
+
+  /** True if cancel() has been called since the last job started. */
+  [[nodiscard]] bool isCancelRequested() const noexcept {
+    return cancelRequested_.load();
+  }
+
+  [[nodiscard]] qvac_lib_inference_addon_cpp::RuntimeStats
+  runtimeStats() const final;
+
+  // ── Log callback ───────────────────────────────────────────────────────────
+
+  static void
+  sdLogCallback(sd_log_level_t level, const char* text, void* userData);
+
+  // ── Generation job input type ─────────────────────────────────────────────
+
+  struct GenerationJob {
+    std::string paramsJson;
+    /** Called each diffusion step: {"step":N,"total":M,"elapsed_ms":T} */
+    std::function<void(const std::string&)> progressCallback;
+    /** Called once per output image with PNG-encoded bytes */
+    std::function<void(const std::vector<uint8_t>&)> outputCallback;
+  };
+
+private:
+  static std::vector<uint8_t> encodeToPng(const sd_image_t& img);
+  static sd_image_t decodePng(const std::vector<uint8_t>& pngBytes);
+
+  const qvac_lib_inference_addon_sd::SdCtxConfig config_;
+
+  std::unique_ptr<sd_ctx_t, decltype(&free_sd_ctx)> sdCtx_;
+  mutable std::atomic<bool> cancelRequested_{false};
+  mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_{};
+
+  // ── Cumulative stats ──────────────────────────────────────────────────────
+  struct CumulativeStats {
+    int64_t modelLoadMs{0};
+    int64_t totalGenerationMs{0};
+    int64_t totalWallMs{0};
+    int64_t totalSteps{0};
+    int64_t totalGenerations{0};
+    int64_t totalImages{0};
+    int64_t totalPixels{0};
+  };
+  CumulativeStats stats_{};
+};
diff --git a/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.cpp b/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.cpp
new file mode 100644
index 0000000000..6b306b557c
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.cpp
@@ -0,0 +1,178 @@
+#include "BackendSelection.hpp"
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+
+#include <ggml-backend.h>
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+
+#include "LoggingMacros.hpp"
+
+using namespace qvac_errors;
+
+namespace {
+
+// Extract the Adreno model number from a device description string.
+// Returns 0 if the device is not an Adreno GPU.
+// Example: "Adreno (TM) 830" -> 830, "Adreno (TM) 740" -> 740
+int parseAdrenoModel(const std::string& description) {
+  std::string lower = description;
+  std::transform(
+      lower.begin(), lower.end(), lower.begin(), [](unsigned char c) {
+        return std::tolower(c);
+      });
+
+  auto pos = lower.find("adreno");
+  if (pos == std::string::npos) {
+    return 0;
+  }
+
+  // Scan forward from "adreno" to find the first digit sequence
+  for (size_t i = pos + 6; i < lower.size(); ++i) {
+    if (std::isdigit(static_cast<unsigned char>(lower[i]))) {
+      return std::stoi(lower.substr(i));
+    }
+  }
+  return 0;
+}
+
+std::string toLowerCopy(std::string s) {
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
+    return std::tolower(c);
+  });
+  return s;
+}
+
+} // namespace
+
+namespace sd_backend_selection {
+
+BackendDevice preferredDeviceFromMap(
+    const std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("device");
+  if (it == configMap.end()) {
+    return BackendDevice::GPU; // default: prefer GPU
+  }
+
+  const std::string& device = it->second;
+  if (device == "gpu") {
+    return BackendDevice::GPU;
+  }
+  if (device == "cpu") {
+    return BackendDevice::CPU;
+  }
+
+  throw StatusError(
+      general_error::InvalidArgument,
+      "Invalid device value '" + device + "'. Must be 'gpu' or 'cpu'.");
+}
+
+int threadsFromMap(
+    const std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("threads");
+  if (it == configMap.end()) {
+    return -1; // auto
+  }
+  try {
+    return std::stoi(it->second);
+  } catch (...) {
+    return -1;
+  }
+}
+
+BackendDevice resolveBackendForDevice(BackendDevice preferred) {
+  using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
+
+  if (preferred == BackendDevice::CPU) {
+    QLOG_IF(Priority::INFO, "Backend selection: user requested CPU");
+    return BackendDevice::CPU;
+  }
+
+  const size_t nDevices = ggml_backend_dev_count();
+  QLOG_IF(
+      Priority::INFO,
+      "Backend selection: " + std::to_string(nDevices) + " device(s)");
+
+  for (size_t i = 0; i < nDevices; ++i) {
+    ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+    enum ggml_backend_dev_type devType = ggml_backend_dev_type(dev);
+    if (devType != GGML_BACKEND_DEVICE_TYPE_GPU &&
+        devType != GGML_BACKEND_DEVICE_TYPE_IGPU) {
+      continue;
+    }
+
+    const char* desc = ggml_backend_dev_description(dev);
+    const char* name = ggml_backend_dev_name(dev);
+    QLOG_IF(
+        Priority::INFO,
+        std::string("Backend selection: GPU device '") + desc +
+            "' (backend: " + name + ")");
+
+    int model = parseAdrenoModel(desc);
+    if (model > 0) {
+      QLOG_IF(
+          Priority::INFO,
+          "Backend selection: Adreno model " + std::to_string(model));
+    }
+
+    if (model >= 800) {
+      QLOG_IF(Priority::INFO, "Backend selection: Adreno 800+ -> GPU (OpenCL)");
+      return BackendDevice::GPU;
+    }
+    if (model >= 600) {
+      QLOG_IF(Priority::INFO, "Backend selection: Adreno 600/700 -> CPU");
+      return BackendDevice::CPU;
+    }
+  }
+
+  QLOG_IF(Priority::INFO, "Backend selection: non-Adreno -> GPU (Vulkan)");
+  return BackendDevice::GPU;
+}
+
+bool shouldPreferOpenClForAdreno(BackendDevice preferred) {
+  using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
+
+  if (preferred == BackendDevice::CPU) {
+    return false;
+  }
+
+  const size_t nDevices = ggml_backend_dev_count();
+  bool hasAdreno800Plus = false;
+  bool hasOpenClGpu = false;
+
+  for (size_t i = 0; i < nDevices; ++i) {
+    ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+    enum ggml_backend_dev_type devType = ggml_backend_dev_type(dev);
+    if (devType != GGML_BACKEND_DEVICE_TYPE_GPU &&
+        devType != GGML_BACKEND_DEVICE_TYPE_IGPU) {
+      continue;
+    }
+
+    const std::string desc = ggml_backend_dev_description(dev)
+                                 ? ggml_backend_dev_description(dev)
+                                 : "";
+    const std::string backendName =
+        ggml_backend_dev_name(dev) ? ggml_backend_dev_name(dev) : "";
+
+    const int model = parseAdrenoModel(desc);
+    if (model >= 800) {
+      hasAdreno800Plus = true;
+    }
+
+    if (toLowerCopy(backendName).find("opencl") != std::string::npos) {
+      hasOpenClGpu = true;
+    }
+  }
+
+  const bool preferOpenCl = hasAdreno800Plus && hasOpenClGpu;
+  if (preferOpenCl) {
+    QLOG_IF(
+        Priority::INFO,
+        "Backend selection: Adreno 800+ with OpenCL backend available -> "
+        "prefer OpenCL");
+  }
+  return preferOpenCl;
+}
+
+} // namespace sd_backend_selection
diff --git a/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.hpp b/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.hpp
new file mode 100644
index 0000000000..22d64813a9
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/utils/BackendSelection.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+
+namespace sd_backend_selection {
+
+enum class BackendDevice : uint8_t { CPU, GPU };
+
+/**
+ * Parse the "device" key from a config map.
+ * Returns CPU or GPU. Throws StatusError on unknown value.
+ */
+BackendDevice preferredDeviceFromMap(
+    const std::unordered_map<std::string, std::string>& configMap);
+
+/**
+ * Determine the number of CPU threads from a config map.
+ * Returns -1 (auto) if not specified.
+ */
+int threadsFromMap(
+    const std::unordered_map<std::string, std::string>& configMap);
+
+/**
+ * Resolve the effective backend for stable-diffusion.cpp by inspecting
+ * available ggml devices at runtime.
+ *
+ * Priority:
+ *   Adreno 800+  -> GPU (OpenCL will be selected by init_backend)
+ *   Adreno 600/700 -> CPU (OpenCL works but is slow; force CPU)
+ *   Everything else -> GPU (Vulkan or other backend via init_backend)
+ *
+ * Returns the resolved BackendDevice.
+ */
+BackendDevice resolveBackendForDevice(BackendDevice preferred);
+
+/**
+ * Returns true when runtime device probing indicates that OpenCL should be
+ * preferred for Adreno 800+ GPUs.
+ *
+ * This only applies when preferred is GPU. CPU preference always returns false.
+ */
+bool shouldPreferOpenClForAdreno(BackendDevice preferred);
+
+} // namespace sd_backend_selection
diff --git a/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.cpp b/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.cpp
new file mode 100644
index 0000000000..2aebd97e8e
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.cpp
@@ -0,0 +1,42 @@
+#include "LoggingMacros.hpp"
+
+using namespace qvac_lib_inference_addon_cpp::logger;
+
+namespace qvac_lib_inference_addon_sd {
+namespace logging {
+
+// Default to ERROR to prevent log spam before verbosity is configured
+Priority g_verbosityLevel = Priority::ERROR;
+
+void setVerbosityLevel(
+    std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("verbosity");
+  if (it == configMap.end())
+    return;
+
+  try {
+    const int v = std::stoi(it->second);
+    switch (v) {
+    case 0:
+      g_verbosityLevel = Priority::ERROR;
+      break;
+    case 1:
+      g_verbosityLevel = Priority::WARNING;
+      break;
+    case 2:
+      g_verbosityLevel = Priority::INFO;
+      break;
+    case 3:
+    default:
+      g_verbosityLevel = Priority::DEBUG;
+      break;
+    }
+  } catch (...) {
+    g_verbosityLevel = Priority::ERROR;
+  }
+
+  configMap.erase(it);
+}
+
+} // namespace logging
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.hpp b/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.hpp
new file mode 100644
index 0000000000..544d22bbe2
--- /dev/null
+++ b/packages/lib-infer-diffusion/addon/src/utils/LoggingMacros.hpp
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+#include "qvac-lib-inference-addon-cpp/Logger.hpp"
+
+namespace qvac_lib_inference_addon_sd {
+namespace logging {
+
+// Global verbosity level shared across all SD model instances
+extern qvac_lib_inference_addon_cpp::logger::Priority g_verbosityLevel;
+
+/**
+ * Parse the "verbosity" key from a config map and set the global log level.
+ * 0=error, 1=warn, 2=info, 3=debug. Defaults to ERROR if not present.
+ */
+void setVerbosityLevel(std::unordered_map<std::string, std::string>& configMap);
+
+} // namespace logging
+} // namespace qvac_lib_inference_addon_sd
+
+// Conditional log macro – only emits if priority <= current global level
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define QLOG_IF(priority, message)                                             \
+  do {                                                                         \
+    if (static_cast<int>(priority) <=                                          \
+        static_cast<int>(                                                      \
+            qvac_lib_inference_addon_sd::logging::g_verbosityLevel)) {         \
+      QLOG(priority, message);                                                 \
+    }                                                                          \
+  } while (0)
diff --git a/packages/lib-infer-diffusion/addonLogging.d.ts b/packages/lib-infer-diffusion/addonLogging.d.ts
new file mode 100644
index 0000000000..bd687d60bc
--- /dev/null
+++ b/packages/lib-infer-diffusion/addonLogging.d.ts
@@ -0,0 +1,7 @@
+export interface AddonLogging {
+  setLogger(callback: (priority: number, message: string) => void): void
+  releaseLogger(): void
+}
+
+declare const addonLogging: AddonLogging
+export default addonLogging
diff --git a/packages/lib-infer-diffusion/addonLogging.js b/packages/lib-infer-diffusion/addonLogging.js
new file mode 100644
index 0000000000..479ecdf3da
--- /dev/null
+++ b/packages/lib-infer-diffusion/addonLogging.js
@@ -0,0 +1,6 @@
+const binding = require('./binding')
+
+module.exports = {
+  setLogger: binding.setLogger,
+  releaseLogger: binding.releaseLogger
+}
diff --git a/packages/lib-infer-diffusion/binding.js b/packages/lib-infer-diffusion/binding.js
new file mode 100644
index 0000000000..cea46308c0
--- /dev/null
+++ b/packages/lib-infer-diffusion/binding.js
@@ -0,0 +1 @@
+module.exports = require.addon()
diff --git a/packages/lib-infer-diffusion/build.md b/packages/lib-infer-diffusion/build.md
new file mode 100644
index 0000000000..01ec320765
--- /dev/null
+++ b/packages/lib-infer-diffusion/build.md
@@ -0,0 +1,173 @@
+
+# Building from Source
+
+If you want to build the addon from source instead of using pre-built packages, follow these steps:
+
+## Prerequisites for Building
+
+1. **Install Bare** (version >= 1.24.0):
+   ```bash
+   npm install -g bare
+   ```
+
+2. **Install bare-make**:
+   ```bash
+   npm install -g bare-make
+   ```
+
+3. **Install vcpkg and set VCPKG_ROOT**:
+   
+   This project uses vcpkg for dependency management. You need to install vcpkg and set the `VCPKG_ROOT` environment variable.
+   Cloning the repo ensures you get the exact vcpkg version (2025.12.12) that this project uses:
+   
+   - **macOS/Linux**:
+     ```bash
+     # Clone vcpkg (use a location outside the project directory)
+     cd ~
+     git clone --branch 2025.12.12 --single-branch https://github.com/microsoft/vcpkg.git
+     cd vcpkg
+     
+     # Bootstrap vcpkg
+     ./bootstrap-vcpkg.sh -disableMetrics
+     
+     # Set VCPKG_ROOT environment variable (add to your ~/.zshrc or ~/.bashrc for persistence)
+     export VCPKG_ROOT=$(pwd)
+     ```
+   
+   - **Windows**:
+     ```powershell
+     # Clone vcpkg (use a location outside the project directory)
+     cd C:\
+     git clone --branch 2025.12.12 --single-branch https://github.com/microsoft/vcpkg.git
+     cd vcpkg
+     
+     # Bootstrap vcpkg
+     .\bootstrap-vcpkg.bat
+     
+     # Set VCPKG_ROOT environment variable (for current session)
+     $env:VCPKG_ROOT = (Get-Location).Path
+     
+     # To make it persistent, add it to System Environment Variables or your PowerShell profile
+     ```
+   
+   You can verify it's set by running:
+     - macOS/Linux: `echo $VCPKG_ROOT`
+     - Windows: `echo $env:VCPKG_ROOT`
+
+4. **Platform-specific requirements**:
+   - **macOS**:
+     - Xcode Command Line Tools (`xcode-select --install`)
+     - Apple clang (LLVM compiler is not supported at the moment)
+   - **Linux**: LLVM/Clang 19 (with libc++), CMake 3.25+, Vulkan SDK
+     ```bash
+     # Install LLVM 19
+     wget -q https://apt.llvm.org/llvm.sh && chmod +x llvm.sh && sudo ./llvm.sh 19 all
+
+     # Install Vulkan SDK
+     sudo apt install -y xz-utils
+     wget -q -O /tmp/vulkansdk.tar.xz https://sdk.lunarg.com/sdk/download/latest/linux/vulkan_sdk.tar.xz
+     mkdir -p ~/vulkan && cd ~/vulkan && tar xf /tmp/vulkansdk.tar.xz --strip-components=1
+     export VULKAN_SDK=~/vulkan/x86_64  # or ~/vulkan/aarch64 for ARM64
+
+     # Required dev packages
+     sudo apt-get install libxi-dev libxtst-dev libxrandr-dev
+     ```
+   - **Windows**:
+     - Install Visual Studio 2022 with C++ tools, Clang and LLVM tools
+     - Install LLVM (e.g. `choco upgrade llvm`)
+     - Install Vulkan SDK:
+       ```powershell
+       # Download and install
+       Invoke-WebRequest -Uri "https://sdk.lunarg.com/sdk/download/latest/windows/vulkan-sdk.exe" -OutFile vulkan-sdk.exe
+       .\vulkan-sdk.exe --root C:\VulkanSDK --accept-licenses --default-answer --confirm-command install
+
+       # Set environment variable
+       $env:VULKAN_SDK = "C:\VulkanSDK"
+       ```
+   - **All platforms**: Git, CMake 3.25+
+
+## Build Steps
+
+1. **Clone the repository**:
+   ```bash
+   git clone https://github.com/tetherto/qvac.git
+   cd qvac/packages/lib-infer-diffusion
+   ```
+
+2. **Install dependencies**:
+   ```bash
+   npm install
+   ```
+
+3. **Build the addon**:
+   ```bash
+   npm run build
+   ```
+
+   This command runs the complete build pipeline:
+   - `bare-make generate` - Generates build files and downloads/builds vcpkg dependencies (including `stable-diffusion.cpp` and `ggml`)
+   - `bare-make build` - Compiles the native addon
+   - `bare-make install` - Installs the built addon into `prebuilds/`
+
+> **First build note:** The vcpkg step clones and compiles `stable-diffusion.cpp` and `ggml` from source, which can take **5–15 minutes** depending on your machine and internet connection.
+
+## Advanced Build Options
+
+For more control over the build process, you can run the commands individually:
+
+```bash
+# Generate build files (with optional flags)
+bare-make generate
+
+# Build the addon
+bare-make build
+
+# Install the built addon
+bare-make install
+```
+
+### Build with unit tests
+
+```bash
+# Build and run C++ unit tests
+npm run test:cpp
+```
+
+## Building for Different Platforms
+
+Native builds (building for the same platform you're running on) work out of the box.
+
+Cross-compilation:
+
+```bash
+# Example: Build for Android
+bare-make generate --platform android --arch arm64 -D ANDROID_STL=c++_shared
+bare-make build
+bare-make install
+
+# Example: Build for iOS
+bare-make generate --platform ios --arch arm64
+bare-make build
+bare-make install
+```
+
+**Important:** When switching between different platforms or architectures, you should clean the build directory first to avoid configuration conflicts:
+
+```bash
+# Clean build directory before switching platforms
+rm -rf build
+bare-make generate --platform <new-platform> --arch <new-arch>
+bare-make build
+bare-make install
+```
+
+**Supported platforms:** `linux`, `win32`, `darwin`, `android`, `ios`
+**Supported architectures:** `x64`, `arm64`
+
+## Troubleshooting Build Issues
+
+- **VCPKG_ROOT env var must be set**: Make sure you've installed vcpkg and set the `VCPKG_ROOT` environment variable to point to your vcpkg installation directory. See the "Install vcpkg and set VCPKG_ROOT" section above.
+- **CMake cannot find cmake-bare**: Make sure you installed `bare` (not `bare-runtime`). The `bare` package includes the necessary CMake configuration files.
+- **Android cross-compilation fails with "Could NOT find Vulkan (missing: glslc)"**: Install Vulkan shader compiler tools with `brew install shaderc` on macOS.
+- **Build is targeting wrong platform**: If you're switching between platforms (e.g., from macOS to iOS) and the build is still targeting the previous platform, clean the build directory first: `rm -rf build` before running `bare-make generate` again.
+- **macOS JS code silently crashes**: `bare-make` currently prefers the Homebrew LLVM toolchain when it is installed, which can produce corrupted `prebuilds` binaries that segfault early in JS usage. If you hit this, uninstall or temporarily move your Homebrew LLVM during the build.
diff --git a/packages/lib-infer-diffusion/docs/architecture.md b/packages/lib-infer-diffusion/docs/architecture.md
new file mode 100644
index 0000000000..21bb8b1de6
--- /dev/null
+++ b/packages/lib-infer-diffusion/docs/architecture.md
@@ -0,0 +1,767 @@
+# Architecture Documentation
+
+**Package:** `@qvac/img-stable-diffusion-cpp` v0.1.0  
+**Stack:** JavaScript, C++20, stable-diffusion.cpp, Bare Runtime, CMake, vcpkg  
+**License:** Apache-2.0
+
+---
+
+## Table of Contents
+
+### Overview
+- [Purpose](#purpose)
+- [Key Features](#key-features)
+- [Target Platforms](#target-platforms)
+
+### Core Architecture
+- [Package Context](#package-context)
+- [Public API](#public-api)
+- [Internal Architecture](#internal-architecture)
+- [Core Components](#core-components)
+- [Bare Runtime Integration](#bare-runtime-integration)
+
+### Architecture Decisions
+- [Decision 1: stable-diffusion.cpp as Inference Backend](#decision-1-stable-diffusioncpp-as-inference-backend)
+- [Decision 2: Bare Runtime over Node.js](#decision-2-bare-runtime-over-nodejs)
+- [Decision 3: Disk-Local Model Files](#decision-3-disk-local-model-files)
+- [Decision 4: Direct File Path Loading](#decision-4-direct-file-path-loading)
+- [Decision 5: Generation Parameters Format](#decision-5-generation-parameters-format-json-serialization)
+- [Decision 6: Exclusive Run Queue](#decision-6-exclusive-run-queue-indexjs)
+- [Decision 7: TypeScript Definitions](#decision-7-typescript-definitions)
+
+### Technical Debt
+- [Limited Error Context](#1-limited-error-context)
+
+---
+
+# Overview
+
+## Purpose
+
+`@qvac/img-stable-diffusion-cpp` is a cross-platform npm package providing diffusion model inference for Bare runtime applications. It wraps stable-diffusion.cpp in a JavaScript-friendly API, enabling local image and video generation on desktop and mobile with CPU/GPU acceleration.
+
+**Core value:**
+- High-level JavaScript API for diffusion model inference
+- Progress callback during generation steps
+- Text-to-image and image-to-image generation via single `run()` API
+- Disk-local model files (no download/streaming layer)
+
+## Key Features
+
+- **Cross-platform**: macOS, Linux, Windows, iOS, Android
+- **Disk-local models**: Files must be present on disk at `diskPath`
+- **Progress tracking**: Step-by-step generation progress callbacks
+- **GPU acceleration**: Metal, Vulkan, OpenCL
+- **Quantized models**: GGUF, safetensors, checkpoint formats
+- **Diffusion models**: SD1.x, SD2.x, SDXL, SD3, FLUX.2 [klein]
+- **Generation modes**: txt2img, img2img (auto-detected via `init_image` parameter)
+
+## Target Platforms
+
+| Platform | Architecture | Min Version | Status | GPU Support |
+|----------|-------------|-------------|--------|-------------|
+| macOS | arm64, x64 | 14.0+ | ✅ Tier 1 | Metal |
+| iOS | arm64 | 17.0+ | ✅ Tier 1 | Metal |
+| Linux | arm64, x64 | Ubuntu-22+ | ✅ Tier 1 | Vulkan |
+| Android | arm64 | 12+ | ✅ Tier 1 | Vulkan, OpenCL |
+| Windows | x64 | 10+ | ✅ Tier 1 | Vulkan |
+
+**Dependencies:**
+- qvac-lib-inference-addon-cpp (≥1.1.2): C++ addon framework (single-job runner, runJob/activate/cancel/destroyInstance)
+- stable-diffusion.cpp: Diffusion inference engine
+- Bare Runtime (≥1.24.0): JavaScript runtime
+- Ubuntu-22 requires g++-13 installed
+
+---
+
+# Core Architecture
+
+## Package Context
+
+### Ecosystem Position
+
+```mermaid
+graph TB
+    subgraph "Application Layer"
+        APP[QVAC Applications]
+    end
+    
+    subgraph "Inference Addons"
+        IMG[img-stable-diffusion-cpp<br/>Image/Video Gen]
+        LLM[llm-llamacpp<br/>LLMs]
+        EMBED[embed-llamacpp<br/>Embeddings]
+        WHISPER[whispercpp<br/>STT]
+    end
+    
+    subgraph "core libs"
+        BASE["@qvac/infer-base"]
+    end
+    
+    subgraph "Native Framework"
+        ADDON[addon-cpp]
+    end
+    
+    subgraph "Backend"
+        BARE[Bare Runtime]
+        SDCPP[stable-diffusion.cpp]
+    end
+    
+    APP --> IMG
+    IMG --> BASE
+    IMG --> ADDON
+    ADDON --> BARE
+    ADDON --> SDCPP
+    
+    style IMG fill:#e1f5ff,stroke:#0066cc,stroke-width:3px
+```
+
+<details>
+<summary>📊 LLM-Friendly: Package Relationships</summary>
+
+**Dependency Table:**
+
+| Package | Type | Version | Purpose |
+|---------|------|---------|---------|
+| @qvac/infer-base | Framework | ^0.2.0 | Base classes (BaseInference, QvacResponse) |
+| qvac-lib-inference-addon-cpp | Native | ≥1.1.1 | C++ addon framework (single-job runner) |
+| stable-diffusion.cpp | Native | latest | Diffusion inference engine |
+| Bare Runtime | Runtime | ≥1.24.0 | JavaScript execution |
+
+**Integration Points:**
+
+| From | To | Mechanism | Data Format |
+|------|-----|-----------|-------------|
+| JavaScript | ImgStableDiffusion | Constructor | args, config objects |
+| ImgStableDiffusion | BaseInference | Inheritance | Template method pattern |
+| ImgStableDiffusion | SdInterface | Composition | Method calls |
+| SdInterface | C++ Addon | require.addon() | Native binding |
+
+</details>
+
+---
+
+## Public API
+
+### Main Class: ImgStableDiffusion
+
+```mermaid
+classDiagram
+    class ImgStableDiffusion {
+        +constructor(args, config)
+        +load() Promise~void~
+        +run(params: GenerationParams) Promise~QvacResponse~
+        +cancel() Promise~void~
+        +unload() Promise~void~
+    }
+
+    class BaseInference {
+        <<abstract>>
+        +load() Promise~void~
+        +run() Promise~QvacResponse~
+        +unload() Promise~void~
+        #_runInternal() Promise~QvacResponse~
+        #_withExclusiveRun(fn) Promise~any~
+    }
+
+    class QvacResponse {
+        +onUpdate(callback) QvacResponse
+        +await() Promise~void~
+        +cancel() Promise~void~
+    }
+
+    ImgStableDiffusion --|> BaseInference
+    ImgStableDiffusion ..> QvacResponse : creates
+```
+
+<details>
+<summary>📊 LLM-Friendly: Class Responsibilities</summary>
+
+**Component Roles:**
+
+| Class | Responsibility | Lifecycle | Dependencies |
+|-------|----------------|-----------|--------------|
+| ImgStableDiffusion | Orchestrate model lifecycle, manage loading/inference | Created by user, persistent | SdInterface |
+| BaseInference | Define standard inference API (template method pattern) | Abstract base class | None |
+| QvacResponse | Handle generation progress and result | Created per `run()` call | None |
+
+**Key Relationships:**
+
+| From | To | Type | Purpose |
+|------|-----|------|---------|
+| ImgStableDiffusion | BaseInference | Inheritance | Standard QVAC inference API |
+| ImgStableDiffusion | QvacResponse | Creates | Progress/result per generation |
+
+</details>
+
+---
+
+## Internal Architecture
+
+### Architectural Pattern
+
+The package follows a **layered architecture** with clear separation of concerns:
+
+```mermaid
+graph TB
+    subgraph "Layer 1: JavaScript API"
+        APP["Application Code"]
+        IMGCLASS["ImgStableDiffusion<br/>(index.js)"]
+        BASEINF["BaseInference<br/>(@qvac/infer-base)"]
+        RESPONSE["QvacResponse"]
+    end
+    
+    subgraph "Layer 2: Bridge"
+        SDIF["SdInterface<br/>(addon.js)"]
+        BINDING["require.addon<br/>(binding.js)"]
+    end
+    
+    subgraph "Layer 3: C++ Addon"
+        JSINTERFACE["JsInterface<br/>(addon-cpp JsInterface)"]
+        ADDONCPP["AddonCpp / AddonJs<br/>(addon-cpp + addon/AddonJs.hpp)"]
+    end
+    
+    subgraph "Layer 4: Model"
+        SDMODEL["SdModel<br/>(model-interface/SdModel.cpp)"]
+        TXT2IMG["Txt2ImgContext<br/>(model-interface/Txt2ImgContext.cpp)"]
+        IMG2IMG["Img2ImgContext<br/>(model-interface/Img2ImgContext.cpp)"]
+        VIDGEN["VideoGenContext<br/>(model-interface/VideoGenContext.cpp)"]
+    end
+    
+    subgraph "Layer 5: Backend"
+        SDCPP["stable-diffusion.cpp"]
+        GGML["GGML"]
+        GPU["GPU Backends<br/>(Metal/Vulkan/OpenCL)"]
+    end
+    
+    APP --> IMGCLASS
+    IMGCLASS --> BASEINF
+    IMGCLASS --> SDIF
+    IMGCLASS -.-> RESPONSE
+
+    SDIF --> BINDING
+    BINDING --> JSINTERFACE
+
+    JSINTERFACE --> ADDONCPP
+    ADDONCPP --> SDMODEL
+    
+    SDMODEL --> TXT2IMG
+    SDMODEL --> IMG2IMG
+    SDMODEL --> VIDGEN
+    TXT2IMG --> SDCPP
+    IMG2IMG --> SDCPP
+    VIDGEN --> SDCPP
+    
+    SDCPP --> GGML
+    GGML --> GPU
+    
+    style IMGCLASS fill:#e1f5ff
+    style ADDONCPP fill:#ffe1e1
+    style SDMODEL fill:#ffe1e1
+    style SDCPP fill:#e1ffe1
+```
+
+<details>
+<summary>📊 LLM-Friendly: Layer Responsibilities</summary>
+
+**Layer Breakdown:**
+
+| Layer | Components | Responsibility | Language | Why This Layer |
+|-------|------------|----------------|----------|----------------|
+| 1. JavaScript API | ImgStableDiffusion, BaseInference, QvacResponse | High-level API, error handling | JS | Ergonomic API for npm consumers |
+| 2. Bridge | SdInterface, binding.js | JS↔C++ communication | JS wrapper | Lifecycle management, handle safety |
+| 3. C++ Addon | JsInterface, AddonCpp/AddonJs | Single-job runner, threading, callbacks | C++ | Performance, native integration |
+| 4. Model | SdModel, Contexts | Diffusion logic, sampling | C++ | Direct stable-diffusion.cpp integration |
+| 5. Backend | stable-diffusion.cpp, GGML | Tensor ops, GPU kernels | C++ | Optimized inference |
+
+**Data Flow Through Layers:**
+
+| Direction | Path | Data Format | Transform |
+|-----------|------|-------------|-----------|
+| Input → | JS → Bridge → Addon | JSON params | Serialize generation params |
+| Input → | Addon → Model | parsed params | Parse JSON, configure sampler |
+| Input → | Model → SD.cpp | latent tensors | Encode prompt, prepare latents |
+| Output ← | SD.cpp → Model | latent tensors | Denoise step |
+| Output ← | Model → Addon | step progress | Report progress |
+| Output ← | Addon → Bridge | progress/image | Queue output |
+| Output ← | Bridge → JS | Uint8Array (PNG) | Emit via callback |
+
+</details>
+
+---
+
+## Core Components
+
+### JavaScript Components
+
+#### **ImgStableDiffusion (index.js)**
+
+**Responsibility:** Main API class, orchestrates model lifecycle and inference
+
+**Why JavaScript:**
+- High-level API ergonomics for npm consumers
+- Promise/async-await integration
+- Event loop integration for progress callbacks
+- Configuration parsing
+
+#### **SdInterface (addon.js)**
+
+**Responsibility:** JavaScript wrapper around native addon, manages handle lifecycle
+
+**Why JavaScript:**
+- Clean JavaScript API over raw C++ bindings
+- Native handle lifecycle management
+- Type conversion between JS and native
+
+### C++ Components
+
+#### **SdModel (model-interface/SdModel.cpp)**
+
+**Responsibility:** Core diffusion implementation wrapping stable-diffusion.cpp
+
+**Why C++:**
+- Direct integration with stable-diffusion.cpp C API
+- Performance-critical diffusion loop
+- Memory-efficient tensor processing
+- Native GPU backend access
+
+#### **AddonCpp / AddonJs (addon-cpp + addon/AddonJs.hpp)**
+
+**Responsibility:** Addon-cpp framework integration; IMG addon provides createInstance and runJob over JsInterface
+
+**Why C++:**
+- Single-job runner (one job at a time, runJob returns boolean accepted)
+- Dedicated processing thread via addon-cpp JobRunner
+- Thread-safe job submission and cancellation (IModelCancel)
+- Output dispatching via uv_async
+
+**IMG specialization:** createInstance builds SdModel with config; runJob parses generation params (prompt, negative_prompt, cfg_scale, steps, etc.)
+
+#### **BackendSelection (utils/BackendSelection.cpp)**
+
+**Responsibility:** GPU backend selection at runtime
+
+- Selects between CPU, Metal, Vulkan, and OpenCL backends at runtime
+- Metal compiled statically on macOS/iOS
+- Vulkan as cross-platform GPU backend
+- OpenCL for Adreno GPUs on Android
+
+#### **SamplerManager (model-interface/SamplerManager.cpp)**
+
+**Responsibility:** Manages diffusion sampling methods
+
+- Supports multiple samplers: Euler, Euler A, Heun, DPM2, DPM++ 2M, DPM++ 2S a, LCM
+- Configurable CFG scale, steps, seed
+- Scheduler selection (Karras, linear, etc.)
+
+#### **LoraManager (model-interface/LoraManager.cpp)**
+
+**Responsibility:** LoRA weight loading and application
+
+- Loads LoRA weights from safetensors/GGUF
+- Applies LoRA to UNet and text encoder
+- Supports multiple simultaneous LoRAs with configurable weights
+
+---
+
+## Bare Runtime Integration
+
+### Communication Pattern
+
+```mermaid
+sequenceDiagram
+    participant JS as JavaScript
+    participant IF as SdInterface
+    participant Bind as Native Binding
+    participant Addon as AddonCpp/AddonJs
+    participant Model as SdModel
+    participant SD as stable-diffusion.cpp
+    
+    JS->>IF: run(params)
+    IF->>Bind: runJob(handle, paramsJson)
+    Bind->>Addon: runJob(params) [lock mutex]
+    Addon->>Addon: Set job input
+    Addon->>Addon: cv.notify_one()
+    Bind-->>IF: accepted (boolean)
+    IF-->>JS: QvacResponse
+    
+    Note over Addon: Processing Thread
+    Addon->>Addon: Take job
+    Addon->>Addon: uv_async_send (JobStarted)
+    
+    loop For each diffusion step
+        Addon->>Model: process(params)
+        Model->>SD: sd_txt2img_step()
+        SD-->>Model: latents
+        Model->>Addon: progressCallback(step, total)
+        Addon->>Addon: Queue progress [lock]
+        Addon->>Addon: uv_async_send()
+    end
+    
+    Model->>SD: vae_decode()
+    SD-->>Model: pixel_data
+    Model->>Addon: outputCallback(image_data)
+    
+    Note over Addon: UV async callback
+    Addon->>Bind: jsOutputCallback()
+    Bind->>IF: outputCb('Output', jobId, image)
+    IF->>JS: Response emits image
+```
+
+<details>
+<summary>📊 LLM-Friendly: Thread Communication</summary>
+
+**Thread Responsibilities:**
+
+| Thread | Runs | Blocks On | Can Call |
+|--------|------|-----------|----------|
+| JavaScript | App code, callbacks | Nothing (event loop) | All JS, addon methods |
+| Processing | Diffusion steps | model.process() | model.*, uv_async_send() |
+
+**Synchronization Primitives:**
+
+| Primitive | Purpose | Held Duration | Risk |
+|-----------|---------|---------------|------|
+| std::mutex | Protect single job state | <1ms | Low (brief) |
+| std::condition_variable | Wake processing thread | N/A | None |
+| uv_async_t | Wake JS thread | N/A | None |
+
+**Thread Safety Rules:**
+
+1. ✅ Call addon methods from any thread (runJob, cancel, activate, destroyInstance)
+2. ✅ Processing thread calls model methods
+3. ❌ Don't call JS functions from C++ thread (use uv_async_send)
+4. ❌ Don't call model methods from JS thread
+
+</details>
+
+---
+
+# Architecture Decisions
+
+## Decision 1: stable-diffusion.cpp as Inference Backend
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** stable-diffusion.cpp over Python diffusers, ONNX Runtime, and alternatives  
+**Why:** Pure C++ implementation, GGML-based (consistent with llama.cpp), broad model support, mature cross-platform GPU acceleration  
+**Cost:** Large binary size, C++ build complexity, API instability
+
+</details>
+
+### Context
+
+Need high-performance, cross-platform diffusion model inference for resource-constrained environments (laptops, mobile devices) with support for:
+- Various model architectures (SD1.x, SD2.x, SDXL, SD3, FLUX, Wan, etc.)
+- Quantization for reduced memory footprint
+- GPU acceleration on diverse hardware
+- Both image and video generation
+
+### Decision
+
+Use stable-diffusion.cpp as the core inference engine instead of Python diffusers, ONNX Runtime, or custom implementation.
+
+### Rationale
+
+**Performance:**
+- Pure C/C++ implementation for maximum performance
+- GGML-based tensor operations (same as llama.cpp, familiar ecosystem)
+- Supports quantization reducing memory by 2-8x
+- GPU acceleration via Metal (Apple), Vulkan (cross-platform), OpenCL (Android/Adreno)
+
+**Model Support:**
+- Comprehensive support for diffusion models:
+  - SD1.x, SD2.x, SD-Turbo
+  - SDXL, SDXL-Turbo
+  - SD3/SD3.5
+  - FLUX.1-dev/schnell, FLUX.2-dev/klein
+  - Wan2.1/Wan2.2 (video generation)
+  - Qwen Image, Z-Image
+- LoRA, ControlNet support
+- GGUF, safetensors, checkpoint format support
+
+**Development Velocity:**
+- Active development with regular releases
+- Community adding new model support rapidly
+- Mirrors llama.cpp architecture (familiar patterns)
+
+### Alternatives Considered
+
+1. **Python Diffusers (Hugging Face)**
+   - ✅ Comprehensive model support
+   - ✅ Easy to use
+   - ❌ Requires Python runtime
+   - ❌ Heavy memory footprint
+   - ❌ Poor mobile support
+   - ❌ Complex deployment
+
+2. **ONNX Runtime**
+   - ✅ Cross-platform
+   - ✅ Good mobile support
+   - ❌ Requires model conversion
+   - ❌ Limited quantization support
+   - ❌ No native LoRA/ControlNet support
+   - ❌ Complex pipeline orchestration
+
+3. **TensorRT (NVIDIA)**
+   - ✅ Excellent NVIDIA GPU performance
+   - ❌ NVIDIA-only (no AMD, Apple, mobile)
+   - ❌ Requires model compilation per GPU
+   - ❌ Large binary size
+
+4. **Core ML (Apple)**
+   - ✅ Excellent Apple device performance
+   - ❌ Apple-only
+   - ❌ Limited model support
+   - ❌ Requires model conversion
+
+**Why stable-diffusion.cpp Won:**
+- Broadest platform support (desktop + mobile, all major OSes)
+- Pure C++ with no external runtime dependencies
+- GGML integration (consistent with our llama.cpp stack)
+- Active development and growing model support
+- Multiple GPU backends in single codebase
+- Quantization support for memory efficiency
+
+---
+
+## Decision 2: Bare Runtime over Node.js
+
+See [qvac-lib-inference-addon-cpp Decision 4: Why Bare Runtime](https://github.com/tetherto/qvac-lib-inference-addon-cpp/blob/main/docs/architecture.md#decision-4-why-bare-runtime) for rationale.
+
+**Summary:** Mobile support (iOS/Android), lightweight, modern addon API. Core business logic remains runtime-agnostic.
+
+---
+
+## Decision 3: Disk-Local Model Files
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Require model files to already exist on disk at `diskPath`
+**Why:** Simplicity — the addon loads files directly from disk, no streaming/download layer needed
+**Cost:** Caller must ensure files are present before calling `load()`
+
+</details>
+
+### Context
+
+Diffusion models consist of multiple large files (diffusion model, text encoders, VAE). The addon needs these files to create the native `sd_ctx_t` context.
+
+Unlike the LLM addon which historically used WeightsProvider for streaming weights, diffusion loads files directly from disk paths — no loader abstraction is involved.
+
+### Decision
+
+Require all model files to be present on disk at `diskPath` before `load()` is called. The addon constructs file paths by joining `diskPath` with each model filename and passes them directly to stable-diffusion.cpp.
+
+### Rationale
+
+**Simplicity:**
+- No download/streaming abstraction layer needed
+- No WeightsProvider, no progress tracking for downloads
+- Direct file paths to stable-diffusion.cpp
+
+**Split-model support:**
+- Diffusion models may have multiple components (diffusion GGUF, CLIP-L, CLIP-G, T5-XXL, LLM encoder, VAE)
+- All resolved as `path.join(diskPath, filename)` in `_load()`
+- Split vs all-in-one layout detected via heuristic (`isSplitLayout = !!llmModel || !!t5XxlModel`)
+
+### Trade-offs
+- ✅ Simple, no abstraction overhead
+- ✅ No streaming/buffering complexity
+- ❌ Caller responsible for ensuring files exist on disk
+
+---
+
+## Decision 4: Direct File Path Loading
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Pass file paths directly to stable-diffusion.cpp via `sd_ctx_params_t`
+**Why:** stable-diffusion.cpp natively loads from file paths; no need for buffer intermediary
+**Cost:** Files must exist on disk (no streaming from P2P sources)
+
+</details>
+
+### Context
+
+stable-diffusion.cpp accepts model files via file paths in its context parameters (`model_path`, `diffusion_model_path`, `clip_l_path`, `vae_path`, etc.). The addon constructs these paths from `diskPath` + filenames.
+
+### Decision
+
+Pass absolute file paths directly to stable-diffusion.cpp rather than using buffer-based loading. The `_load()` method constructs a `configurationParams` object with resolved paths and passes it to the native addon.
+
+### Rationale
+
+**Simplicity:**
+- stable-diffusion.cpp handles file I/O internally
+- No custom streambuf or buffer management needed
+- No JavaScript reference lifecycle concerns
+
+**Split-model routing:**
+- All-in-one checkpoints (SD1.x, SD2.x, SDXL) → `model_path`
+- Standalone diffusion GGUFs (FLUX.2, SD3 split) → `diffusion_model_path`
+- Separate encoders → `clipLPath`, `clipGPath`, `t5XxlPath`, `llmPath`
+- VAE → `vaePath`
+
+### Trade-offs
+- ✅ No buffer management complexity
+- ✅ stable-diffusion.cpp handles memory-mapped I/O efficiently
+- ❌ Cannot stream from P2P sources directly (files must be on disk first)
+
+---
+
+## Decision 5: Generation Parameters Format (JSON Serialization)
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Serialize generation parameters to JSON string before crossing JS/C++ boundary  
+**Why:** Simple marshalling, familiar pattern, extensible for new parameters  
+**Cost:** JSON parsing overhead per inference call
+
+</details>
+
+### Context
+
+Need to pass complex generation parameters from JavaScript to C++:
+- Prompt and negative prompt
+- Image dimensions (width, height)
+- Sampling parameters (steps, cfg_scale, sampler, seed)
+- Optional inputs (init image for img2img, LoRA configs, ControlNet)
+
+### Decision
+
+Serialize generation parameters to JSON string before passing to C++.
+
+### Rationale
+
+**Simplicity:**
+- Single string parameter instead of complex nested objects
+- JSON parsing well-supported in both JavaScript and C++
+- Consistent with llm-llamacpp pattern
+
+**Extensibility:**
+- Easy to add new parameters without changing C++ interface
+- Optional parameters naturally handled (absent = default)
+- LoRA configs, ControlNet settings as nested objects
+
+### Trade-offs
+- ✅ Portable and well-understood format
+- ❌ Serialization overhead on every call
+- ❌ No compile-time type checking across boundary
+
+### Parameter Schema
+
+```typescript
+interface GenerationParams {
+  prompt: string;
+  negative_prompt?: string;
+  width?: number;             // default: 512
+  height?: number;            // default: 512
+  steps?: number;             // default: 20
+  cfg_scale?: number;         // CFG scale (SD1/SD2/SDXL/SD3)
+  guidance?: number;          // Distilled guidance (FLUX.2)
+  sampling_method?: string;   // 'euler' | 'euler_a' | 'dpm++_2m' | etc.
+  scheduler?: string;         // 'default' | 'karras' | 'exponential' | etc.
+  seed?: number;              // -1 for random
+  batch_count?: number;       // default: 1
+  vae_tiling?: boolean;       // Enable VAE tiling (for large images)
+  cache_preset?: string;      // 'slow' | 'medium' | 'fast' | 'ultra'
+  init_image?: Uint8Array;    // PNG/JPEG bytes — if provided, runs img2img
+  strength?: number;          // img2img: 0.0 = keep source, 1.0 = full redraw
+}
+```
+
+Mode is determined automatically: if `init_image` is provided, runs img2img; otherwise txt2img.
+
+---
+
+## Decision 6: Exclusive Run Queue (index.js)
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Promise-based exclusive run queue using `_withExclusiveRun()` wrapper  
+**Why:** Ensure generation jobs complete without interruption (long-running operations)  
+**Cost:** One generation at a time per model instance
+
+</details>
+
+### Context
+
+Diffusion generation takes significant time (seconds to minutes). Without coordination, concurrent requests could interfere. The addon returns `false` (not accepted) if a job is already running.
+
+### Decision
+
+Implement JavaScript-level promise queue ensuring only one generation job runs at a time per model instance.
+
+### Rationale
+
+**Resource Management:**
+- GPU memory fully utilized during generation
+- No partial state from interrupted generations
+- Predictable VRAM usage
+
+**Progress Integrity:**
+- Step progress callbacks correspond to single job
+- No mixing of progress from concurrent requests
+
+### Trade-offs
+- ✅ Simple promise-based queue
+- ✅ Predictable execution order
+- ❌ One request at a time per instance
+- ❌ Long generations block subsequent requests
+
+**Mitigation:** For batch generation, use batch_count parameter; for parallel jobs, create multiple model instances
+
+---
+
+## Decision 7: TypeScript Definitions
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Hand-written TypeScript definitions (index.d.ts)  
+**Why:** Type safety, IDE support, API documentation  
+**Cost:** Manual maintenance, must keep in sync with implementation
+
+</details>
+
+### Context
+
+Developers expect TypeScript support for better IDE experience, autocomplete, and compile-time checking.
+
+### Decision
+
+Provide hand-written TypeScript definitions in `index.d.ts`.
+
+### Rationale
+
+**Developer Experience:**
+- IDE autocomplete for methods and parameters
+- Compile-time error checking
+- Clear parameter types for generation options
+
+**Documentation:**
+- Types serve as living API documentation
+- Clear contracts for all public methods
+
+### Trade-offs
+- ✅ Catch errors at compile time
+- ❌ Maintenance burden (must keep .d.ts in sync)
+
+---
+
+# Technical Debt
+
+### 1. Limited Error Context
+**Status:** C++ exceptions lose stack traces crossing JS boundary  
+**Issue:** Generic error messages make debugging difficult  
+**Root Cause:** Bare's `js.h` doesn't support error stacks  
+**Plan:** Implement structured error objects with error codes and context
+
+---
+
+**Last Updated:** 2026-03-11
diff --git a/packages/lib-infer-diffusion/docs/data-flows-detailed.md b/packages/lib-infer-diffusion/docs/data-flows-detailed.md
new file mode 100644
index 0000000000..75e9e33c19
--- /dev/null
+++ b/packages/lib-infer-diffusion/docs/data-flows-detailed.md
@@ -0,0 +1,167 @@
+# Detailed Data Flows
+
+This document contains detailed diagrams showing how data moves through the `@qvac/img-stable-diffusion-cpp` system.
+
+**Audience:** Developers debugging complex behavior, contributors understanding system interactions.
+
+> **⚠️ Note:** These detailed diagrams are intended for initial reference and can quickly become outdated as the codebase evolves. For exact debugging and deep understanding, regenerate diagrams from the actual code or trace through the implementation directly.
+
+<details>
+<summary>⚡ TL;DR: Data Flow Overview</summary>
+
+**Communication Pattern:**
+- Two-thread architecture: JavaScript thread + dedicated C++ processing thread
+- Synchronization via mutex and condition variables
+- Cross-thread flow: JS → submit job via `runJob(params)` → wake C++ → process diffusion steps → output → uv_async_send → JS callback
+
+**Generation Path:**
+- JS calls `model.run(params)` → returns QvacResponse immediately (non-blocking)
+- JS serializes params to JSON, calls `addon.runJob(paramsJson)` once; returns boolean (accepted or job already active)
+- C++ single-job runner takes the job, executes diffusion loop → generates image
+- Queues progress/output events → triggers JS callback asynchronously
+- Emits: StepProgress, Output (final image), JobStarted, JobEnded, Error
+
+</details>
+
+## Table of Contents
+
+- [Text-to-Image Generation Flow](#text-to-image-generation-flow)
+
+---
+
+## Text-to-Image Generation Flow
+
+### High-Level Flow
+
+```mermaid
+flowchart TD
+    Start([JS: model.run]) --> ParseParams[Parse generation params]
+    ParseParams --> SerializeJSON[Serialize to JSON]
+    
+    SerializeJSON --> RunJob[addon.runJob(paramsJson)]
+    RunJob --> CreateResp[Create QvacResponse]
+    CreateResp --> ReturnJS([Return to JavaScript])
+    
+    RunJob -.->|Enters native| LockMutex[Lock mutex]
+    LockMutex --> SetJob[Set single job input]
+    SetJob --> NotifyCV[Notify condition variable]
+    NotifyCV --> UnlockMutex[Unlock mutex]
+    
+    NotifyCV -.->|Wakes| ProcThread[Processing Thread]
+    
+    ProcThread --> WaitWork{Has work?}
+    WaitWork -->|No| SleepCV[cv.wait]
+    SleepCV --> WaitWork
+    
+    WaitWork -->|Yes| LockProc[Lock mutex]
+    LockProc --> TakeJob[Take job input]
+    TakeJob --> UnlockProc[Unlock mutex]
+    UnlockProc --> EmitStart[Queue JobStarted event]
+    EmitStart --> SendAsync1[uv_async_send]
+    
+    SendAsync1 --> ParseJSON[Parse JSON params]
+    ParseJSON --> EncodePrompt[Encode prompt (CLIP)]
+    EncodePrompt --> EncodeNeg[Encode negative prompt]
+    EncodeNeg --> InitLatents[Initialize random latents (seed)]
+    
+    InitLatents --> DiffusionLoop{Diffusion Loop}
+    DiffusionLoop -->|Continue| PredictNoise[UNet predict noise]
+    PredictNoise --> ApplyCFG[Apply CFG guidance]
+    ApplyCFG --> SchedulerStep[Scheduler step]
+    SchedulerStep --> QueueProgress[Queue StepProgress event]
+    QueueProgress --> SendAsync2[uv_async_send]
+    SendAsync2 --> DiffusionLoop
+    
+    DiffusionLoop -->|Complete| VAEDecode[VAE decode]
+    VAEDecode --> EncodePNG[Encode to PNG]
+    EncodePNG --> QueueOutput[Queue Output event]
+    QueueOutput --> GetStats[Collect runtime stats]
+    GetStats --> QueueJobEnd[Queue JobEnded event]
+    QueueJobEnd --> SendAsync3[uv_async_send]
+    SendAsync3 --> ProcThread
+    
+    DiffusionLoop -->|Error| QueueError[Queue Error event]
+    QueueError --> ResetModel[model.reset]
+    ResetModel --> SendAsync3
+    
+    SendAsync2 -.->|Triggers| UVCallback[UV async callback]
+    UVCallback --> LockCB[Lock output mutex]
+    LockCB --> DequeueOutputs[Dequeue all outputs]
+    DequeueOutputs --> UnlockCB[Unlock mutex]
+    UnlockCB --> ForEach[For each output event]
+    
+    ForEach --> InvokeJS[Call JavaScript outputCb]
+    InvokeJS --> UpdateResponse[QvacResponse emits]
+    UpdateResponse --> ProgressYield([onStep callback / await])
+```
+
+<details>
+<summary>📊 LLM-Friendly: Generation Flow Breakdown</summary>
+
+**Phase 1: Job Submission (JavaScript → C++)**
+
+| Step | Thread | Duration | Operation | Blocking? |
+|------|--------|----------|-----------|-----------|
+| 1 | JS | <0.1ms | Parse params | No |
+| 2 | JS | <0.1ms | Serialize to JSON | No |
+| 3 | JS | <1ms | Call addon.runJob(params) | No |
+| 4 | JS | <0.1ms | Lock mutex | No |
+| 5 | JS | <0.1ms | Set job input | No |
+| 6 | JS | <0.1ms | Signal CV | No |
+| 7 | JS | <0.1ms | Unlock mutex | No |
+| 8 | JS | <0.1ms | Return accepted (boolean) | No |
+| 9 | C++ | - | Wake from cv.wait() | - |
+
+**Phase 2: Processing (C++ Background Thread)**
+
+| Step | Thread | Duration | Operation | Blocks JS? |
+|------|--------|----------|-----------|------------|
+| 10 | C++ | <0.1ms | Lock mutex | No |
+| 11 | C++ | <0.1ms | Take job input | No |
+| 12 | C++ | <0.1ms | Unlock mutex | No |
+| 13 | C++ | <1ms | Parse JSON params | No |
+| 14 | C++ | 50-200ms | Encode prompts (CLIP) | No |
+| 15 | C++ | <10ms | Initialize latents | No |
+| 16 | C++ | 100-500ms per step | UNet inference | No |
+| 17 | C++ | 200-1000ms | VAE decode | No |
+| 18 | C++ | 10-50ms | PNG encode | No |
+
+**Phase 3: Output Delivery (C++ → JavaScript)**
+
+| Step | Thread | Duration | Operation | Details |
+|------|--------|----------|-----------|---------|
+| 19 | C++ | <0.1ms | Lock output mutex | Per step |
+| 20 | C++ | <0.1ms | Queue progress | Per step |
+| 21 | C++ | <0.1ms | Unlock mutex | Per step |
+| 22 | C++ | <0.1ms | uv_async_send() | May coalesce |
+| 23 | JS | - | UV schedules callback | Next tick |
+| 24 | JS | <0.1ms | Lock mutex | Batch |
+| 25 | JS | <0.1ms | Drain outputs | Batch |
+| 26 | JS | <0.1ms | Unlock mutex | Batch |
+| 27 | JS | Varies | Invoke outputCb | User code |
+
+**Event Types:**
+
+| Event | When | Data | Purpose |
+|-------|------|------|---------|
+| JobStarted | Processing begins | {jobId, timestamp} | Track start |
+| StepProgress | Each diffusion step | {jobId, step, totalSteps} | Progress UI |
+| Output | Generation complete | {jobId, image: Uint8Array, format: 'png'} | Final image |
+| JobEnded | All processing done | {jobId, stats: RuntimeStats} | Track completion |
+| Error | Processing fails | {jobId, error: string} | Error handling |
+
+**Performance Characteristics:**
+
+- Job queueing: <1ms total
+- Prompt encoding: 50-200ms (depends on prompt length)
+- Diffusion steps: 100-500ms per step (model and GPU dependent)
+- VAE decoding: 200-1000ms (resolution dependent)
+- Total 512x512, 20 steps: ~5-15 seconds
+- Total 1024x1024, 20 steps: ~15-60 seconds
+
+</details>
+
+**Related Documents:**
+- [architecture.md](architecture.md) - Complete architecture documentation
+
+**Last Updated:** 2026-03-11
diff --git a/packages/lib-infer-diffusion/examples/generate-image-sd2.js b/packages/lib-infer-diffusion/examples/generate-image-sd2.js
new file mode 100644
index 0000000000..c0ddb790bd
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/generate-image-sd2.js
@@ -0,0 +1,129 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model file — downloaded via: ./scripts/download-model-sd2.sh
+//
+// SD2.1 all-in-one GGUF (Q8_0). No separate text encoder or VAE needed.
+// Source: gpustack/stable-diffusion-v2-1-GGUF (public, no login required).
+//
+// prediction: 'v' is set explicitly in the config because even though this is
+// a GGUF, the gpustack conversion does not always embed the prediction type KV.
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+const MODEL_NAME = 'stable-diffusion-v2-1-Q8_0.gguf'
+
+// ---------------------------------------------------------------------------
+// Generation params — edit freely
+// SD2.1 is trained at 768×768; 512×512 works but looks softer.
+// cfg_scale 7–9 is the typical range; guidance (FLUX-specific) is not used.
+// ---------------------------------------------------------------------------
+const PROMPT = [
+  'an elegant flower in a glass vase, watercolor painting, with textures around the leaves'
+].join(' ')
+
+const NEGATIVE_PROMPT = 'blurry, low quality, watermark, text, bad anatomy'
+
+const STEPS = 5 // SD2.1 benefits from more steps than FLUX distilled
+const WIDTH = 768 // native training resolution for SD2.1
+const HEIGHT = 768
+const CFG = 7.5 // classifier-free guidance scale
+const SEED = -1 // -1 = random
+
+async function main () {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log('Stable Diffusion 2.1 — text-to-image inference')
+  console.log('================================================')
+  console.log('Model  :', MODEL_NAME)
+  console.log('Prompt :', PROMPT)
+  console.log('Steps  :', STEPS)
+  console.log('Size   :', `${WIDTH}x${HEIGHT}`)
+  console.log('CFG    :', CFG)
+  console.log('Seed   :', SEED)
+  console.log()
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME
+      // No llmModel — SD2.1 uses the CLIP text encoder baked into the checkpoint.
+      // No vaeModel — the VAE is baked into the checkpoint.
+    },
+    {
+      threads: 8,
+      // SD2.1 uses v-prediction. This safetensors file has no GGUF metadata so
+      // auto-detection cannot determine the prediction type; set it explicitly.
+      prediction: 'v'
+    }
+  )
+
+  try {
+    // ── 1. Load weights ───────────────────────────────────────────────────────
+    console.log('Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // ── 2. Start generation ───────────────────────────────────────────────────
+    console.log('Starting generation...')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      cfg_scale: CFG, // SD1.x / SD2.x CFG — not the FLUX distilled 'guidance'
+      seed: SEED
+    })
+
+    // ── 3. Stream progress + collect image bytes ──────────────────────────────
+    const images = []
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`\nGenerated in ${((Date.now() - tGen) / 1000).toFixed(1)}s`)
+    console.log(`Got ${images.length} image(s)`)
+
+    // ── 4. Save each image to disk ────────────────────────────────────────────
+    for (let i = 0; i < images.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `sd2_seed${SEED}_${i}.png`)
+      fs.writeFileSync(outPath, images[i])
+      console.log(`Saved → ${outPath}`)
+    }
+  } finally {
+    console.log('\nUnloading model...')
+    await model.unload()
+    console.log('Done.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/generate-image-sd3.js b/packages/lib-infer-diffusion/examples/generate-image-sd3.js
new file mode 100644
index 0000000000..fcd6a8788f
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/generate-image-sd3.js
@@ -0,0 +1,138 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model file — downloaded via: ./scripts/download-model-sd3.sh
+//
+// sd3_medium_incl_clips.safetensors: official Stability AI safetensors from
+// adamo1139/stable-diffusion-3-medium-ungated (ungated public mirror).
+// Contains the diffusion model + CLIP-L + CLIP-G text encoders in one file.
+// No separate encoder paths needed.
+//
+// NOTE: The gpustack GGUF variants (stable-diffusion-v3-medium-*.gguf) have
+// zero KV metadata pairs and are NOT compatible with standard stable-diffusion.cpp.
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+// All-in-one safetensors — diffusion + CLIP-L + CLIP-G:
+const MODEL_NAME = 'sd3_medium_incl_clips.safetensors'
+
+// ---------------------------------------------------------------------------
+// Generation params
+// SD3 Medium uses flow-matching. cfg_scale 4.5–7.0 is the typical range.
+// 512×512 works fine; SD3 was trained at 1024×1024 but smaller is faster.
+// ---------------------------------------------------------------------------
+const PROMPT = [
+  'a majestic red fox standing in a snowy forest at dusk,',
+  'soft golden light through the pine trees,',
+  'photorealistic, 8k, detailed fur'
+].join(' ')
+
+const NEGATIVE_PROMPT = 'blurry, low quality, watermark, text, bad anatomy'
+
+const STEPS = 28 // SD3 Medium typically 20–30 steps
+const WIDTH = 512
+const HEIGHT = 512
+const CFG = 5.0 // SD3 flow-matching; lower than SD1/SD2 (4.5–7.0 range)
+const SEED = 42 // -1 = random
+
+async function main () {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log('Stable Diffusion 3 Medium — text-to-image inference')
+  console.log('=====================================================')
+  console.log('Model  :', MODEL_NAME)
+  console.log('Prompt :', PROMPT)
+  console.log('Steps  :', STEPS)
+  console.log('Size   :', `${WIDTH}x${HEIGHT}`)
+  console.log('CFG    :', CFG)
+  console.log('Seed   :', SEED)
+  console.log()
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME
+      // All-in-one safetensors: no clipLModel, clipGModel, t5XxlModel, or vaeModel.
+      //
+      // To add T5-XXL (better text following) without redownloading the main file:
+      //   t5XxlModel: 't5xxl_fp8_e4m3fn.safetensors'   // download via download-model-sd3.sh
+    },
+    {
+      threads: 4,
+      // SD3 uses flow-matching. The safetensors metadata allows auto-detection,
+      // but we set these explicitly as safety overrides.
+      prediction: 'flow', // FLOW_PRED — SD3 flow-matching
+      flow_shift: '3.0' // SD3 Medium default; overrides INFINITY sentinel
+    }
+  )
+
+  try {
+    // ── 1. Load weights ───────────────────────────────────────────────────────
+    console.log('Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // ── 2. Start generation ───────────────────────────────────────────────────
+    console.log('Starting generation...')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      cfg_scale: CFG, // SD3 CFG — not the FLUX distilled 'guidance'
+      sampling_method: 'euler', // SD3 flow-matching requires euler (not euler_a)
+      seed: SEED
+    })
+
+    // ── 3. Stream progress + collect image bytes ──────────────────────────────
+    const images = []
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`\nGenerated in ${((Date.now() - tGen) / 1000).toFixed(1)}s`)
+    console.log(`Got ${images.length} image(s)`)
+
+    // ── 4. Save each image to disk ────────────────────────────────────────────
+    for (let i = 0; i < images.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `sd3_seed${SEED}_${i}.png`)
+      fs.writeFileSync(outPath, images[i])
+      console.log(`Saved → ${outPath}`)
+    }
+  } finally {
+    console.log('\nUnloading model...')
+    await model.unload()
+    console.log('Done.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/generate-image-sdxl.js b/packages/lib-infer-diffusion/examples/generate-image-sdxl.js
new file mode 100644
index 0000000000..ea730dd00f
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/generate-image-sdxl.js
@@ -0,0 +1,131 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model file — downloaded via: ./scripts/download-model-sdxl.sh
+//
+// SDXL base all-in-one GGUF (Q8_0). CLIP-L, CLIP-G, UNet, and VAE are all
+// baked in — no separate text encoder or VAE needed.
+//
+// prediction is left unset: SDXL uses eps-prediction and the gpustack GGUF
+// has the metadata embedded, so auto-detection works correctly.
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+const MODEL_NAME = 'stable-diffusion-xl-base-1.0-Q4_0.gguf'
+
+// ---------------------------------------------------------------------------
+// Generation params
+// SDXL is trained at 1024×1024 but 512×512 works and is significantly faster.
+// Use cfg_scale (not guidance — that is FLUX-specific).
+// ---------------------------------------------------------------------------
+const PROMPT = [
+  'a majestic red fox standing in a snowy forest at dusk,',
+  'soft golden light through the pine trees,',
+  'photorealistic, 8k, detailed fur'
+].join(' ')
+
+const NEGATIVE_PROMPT = 'blurry, low quality, watermark, text, bad anatomy'
+
+const STEPS = 30
+const WIDTH = 1024
+const HEIGHT = 1024
+const CFG = 6.5
+const SEED = 15 // -1 = random
+
+async function main () {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log('Stable Diffusion XL Base 1.0 — text-to-image inference')
+  console.log('========================================================')
+  console.log('Model  :', MODEL_NAME)
+  console.log('Prompt :', PROMPT)
+  console.log('Steps  :', STEPS)
+  console.log('Size   :', `${WIDTH}x${HEIGHT}`)
+  console.log('CFG    :', CFG)
+  console.log('Seed   :', SEED)
+  console.log()
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME
+      // No llmModel — SDXL uses CLIP-L + CLIP-G baked into the checkpoint.
+      // No vaeModel — the VAE is baked into the checkpoint.
+    },
+    {
+      threads: 4
+      // No prediction override — SDXL uses eps-prediction and the GGUF
+      // has the correct metadata for auto-detection.
+    }
+  )
+
+  try {
+    // ── 1. Load weights ───────────────────────────────────────────────────────
+    console.log('Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // ── 2. Start generation ───────────────────────────────────────────────────
+    console.log('Starting generation...')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      cfg_scale: CFG,
+      seed: SEED
+      // vae_tiling: true  — uncomment if VAE decode fails at larger resolutions
+    })
+
+    // ── 3. Stream progress + collect image bytes ──────────────────────────────
+    const images = []
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`\nGenerated in ${((Date.now() - tGen) / 1000).toFixed(1)}s`)
+    console.log(`Got ${images.length} image(s)`)
+
+    // ── 4. Save each image to disk ────────────────────────────────────────────
+    for (let i = 0; i < images.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `sdxl_seed${SEED}_${i}.png`)
+      fs.writeFileSync(outPath, images[i])
+      console.log(`Saved → ${outPath}`)
+    }
+  } finally {
+    console.log('\nUnloading model...')
+    await model.unload()
+    console.log('Done.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/generate-image.js b/packages/lib-infer-diffusion/examples/generate-image.js
new file mode 100644
index 0000000000..05ac42c39b
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/generate-image.js
@@ -0,0 +1,118 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model files — downloaded via: ./scripts/download-model.sh
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+const MODEL_NAME = 'flux-2-klein-4b-Q8_0.gguf'
+const LLM_MODEL = 'Qwen3-4B-Q4_K_M.gguf'
+const VAE_MODEL = 'flux2-vae.safetensors'
+
+// ---------------------------------------------------------------------------
+// Generation params — edit freely
+// ---------------------------------------------------------------------------
+const PROMPT = [
+  'a majestic red fox standing in a snowy forest at dusk,',
+  'soft golden light through the pine trees,',
+  'photorealistic, 8k, detailed fur'
+].join(' ')
+
+const STEPS = 20
+const WIDTH = 512
+const HEIGHT = 512
+const GUIDANCE = 3.5 // distilled guidance scale for FLUX.2
+const SEED = 42 // -1 = random
+
+async function main () {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log('FLUX.2 [klein] 4B — text-to-image inference')
+  console.log('============================================')
+  console.log('Prompt :', PROMPT)
+  console.log('Steps  :', STEPS)
+  console.log('Size   :', `${WIDTH}x${HEIGHT}`)
+  console.log('Seed   :', SEED)
+  console.log()
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME,
+      llmModel: LLM_MODEL,
+      vaeModel: VAE_MODEL
+    },
+    {
+      threads: 4
+    }
+  )
+
+  try {
+    // ── 1. Load weights ───────────────────────────────────────────────────────
+    console.log('Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // ── 2. Start generation ───────────────────────────────────────────────────
+    console.log('Starting generation...')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      guidance: GUIDANCE,
+      seed: SEED
+    })
+
+    // ── 3. Stream progress + collect image bytes ──────────────────────────────
+    const images = []
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          // PNG-encoded output image
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`\nGenerated in ${((Date.now() - tGen) / 1000).toFixed(1)}s`)
+    console.log(`Got ${images.length} image(s)`)
+
+    // ── 4. Save each image to disk ────────────────────────────────────────────
+    for (let i = 0; i < images.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `output_seed${SEED}_${i}.png`)
+      fs.writeFileSync(outPath, images[i])
+      console.log(`Saved → ${outPath}`)
+    }
+  } finally {
+    console.log('\nUnloading model...')
+    await model.unload()
+    console.log('Done.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/load-model.js b/packages/lib-infer-diffusion/examples/load-model.js
new file mode 100644
index 0000000000..cb15e90e06
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/load-model.js
@@ -0,0 +1,62 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model files — must have been downloaded first via:
+//   ./scripts/download-model.sh
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+
+const MODEL_NAME = 'flux-2-klein-4b-Q8_0.gguf'
+const LLM_MODEL = 'Qwen3-4B-Q4_K_M.gguf'
+const VAE_MODEL = 'flux2-vae.safetensors'
+
+async function main () {
+  console.log('FLUX.2 [klein] 4B — load/unload example')
+  console.log('========================================')
+  console.log('Models dir :', MODELS_DIR)
+  console.log('Model      :', MODEL_NAME)
+  console.log('LLM encoder:', LLM_MODEL)
+  console.log('VAE        :', VAE_MODEL)
+  console.log()
+
+  // ── 1. Construct — stores config, allocates nothing ────────────────────────
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME,
+      llmModel: LLM_MODEL,
+      vaeModel: VAE_MODEL
+    },
+    {
+      threads: 8 // Metal handles GPU; threads are for CPU fallback ops
+    }
+  )
+
+  try {
+    // ── 2. Load — reads weights into memory via activate() → new_sd_ctx() ───
+    console.log('Loading model weights (this takes a moment)...')
+    const t0 = Date.now()
+    await model.load()
+    console.log(`Model loaded in ${((Date.now() - t0) / 1000).toFixed(1)}s`)
+    console.log()
+
+    // ── 3. Model is live — add inference calls here ───────────────────────
+    console.log('Model is ready. (No inference in this example.)')
+    console.log()
+  } finally {
+    // ── 4. Unload — calls free_sd_ctx, releases all GPU/CPU memory ─────────
+    console.log('Unloading model...')
+    await model.unload()
+    console.log('Done — all resources released.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/quickstart.js b/packages/lib-infer-diffusion/examples/quickstart.js
new file mode 100644
index 0000000000..b1e9fa910b
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/quickstart.js
@@ -0,0 +1,139 @@
+'use strict'
+
+/**
+ * Stable Diffusion Quickstart Example
+ *
+ * Generate an image from a text prompt using SD2.1.
+ *
+ * Prerequisites: ./scripts/download-model-sd2.sh
+ * Usage: bare examples/quickstart.js
+ */
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const binding = require('../binding')
+const ImgStableDiffusion = require('../index')
+
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+const MODEL_NAME = 'stable-diffusion-v2-1-Q8_0.gguf'
+const PROMPT = 'a cozy cabin in a snowy mountain landscape at sunset, warm light from windows, photorealistic'
+const NEGATIVE_PROMPT = 'blurry, low quality, watermark, text'
+
+async function main () {
+  console.log('=== Stable Diffusion Quickstart ===\n')
+
+  // 1. Setup native logger
+  const LOG_PRIORITIES = ['ERROR', 'WARNING', 'INFO', 'DEBUG']
+  binding.setLogger((priority, message) => {
+    const label = LOG_PRIORITIES[priority] || `UNKNOWN(${priority})`
+    console.log(`[C++ ${label}] ${message}`)
+  })
+
+  // 2. Validate model exists
+  const modelPath = path.join(MODELS_DIR, MODEL_NAME)
+  if (!fs.existsSync(modelPath)) {
+    console.error(`Model not found: ${modelPath}`)
+    console.error('Run: ./scripts/download-model-sd2.sh')
+    binding.releaseLogger()
+    return
+  }
+
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log(`Model : ${MODEL_NAME}`)
+  console.log(`Prompt: ${PROMPT}\n`)
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME,
+      opts: { stats: true }
+    },
+    {
+      threads: 4,
+      prediction: 'v',
+      verbosity: 2
+    }
+  )
+
+  try {
+    // 3. Load model weights
+    console.log('1. Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`   Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // 4. Generate image
+    console.log('2. Generating image...')
+    const images = []
+
+    const response = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: 20,
+      width: 512,
+      height: 512,
+      cfg_scale: 7.5,
+      seed: 42
+    })
+
+    let stats = null
+    response.on('stats', (s) => { stats = s })
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r   [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`   Got ${images.length} image(s)\n`)
+
+    // 5. Save output
+    console.log('3. Saving output...')
+    for (let i = 0; i < images.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `quickstart_${i}.png`)
+      fs.writeFileSync(outPath, images[i])
+      console.log(`   Saved → ${outPath}`)
+    }
+
+    // 6. Print runtime stats
+    if (!stats) stats = response.stats
+    if (stats) {
+      console.log('\n4. Runtime Stats:')
+      for (const [key, value] of Object.entries(stats)) {
+        const formatted = typeof value === 'number'
+          ? (Number.isInteger(value) ? String(value) : value.toFixed(4))
+          : String(value)
+        console.log(`   ${key}: ${formatted}`)
+      }
+    }
+  } finally {
+    // 7. Cleanup
+    console.log('\n5. Cleaning up...')
+    await model.unload()
+    binding.releaseLogger()
+    console.log('\nDone!')
+  }
+}
+
+main().catch(err => {
+  console.error('Error:', err.message || err)
+  binding.releaseLogger()
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/examples/runtime-stats-sd2.js b/packages/lib-infer-diffusion/examples/runtime-stats-sd2.js
new file mode 100644
index 0000000000..8547fac071
--- /dev/null
+++ b/packages/lib-infer-diffusion/examples/runtime-stats-sd2.js
@@ -0,0 +1,183 @@
+'use strict'
+
+const path = require('bare-path')
+const process = require('bare-process')
+const fs = require('bare-fs')
+const ImgStableDiffusion = require('../index')
+
+// ---------------------------------------------------------------------------
+// Model file — downloaded via: ./scripts/download-model-sd2.sh
+// ---------------------------------------------------------------------------
+const MODELS_DIR = path.resolve(__dirname, '../models')
+const OUTPUT_DIR = path.resolve(__dirname, '../output')
+
+const MODEL_NAME = 'stable-diffusion-v2-1-Q8_0.gguf'
+
+// ---------------------------------------------------------------------------
+// Generation params
+// ---------------------------------------------------------------------------
+const PROMPT = [
+  'a majestic red fox standing in a snowy forest at dusk,',
+  'soft golden light through the pine trees,',
+  'photorealistic, 8k, detailed fur'
+].join(' ')
+
+const NEGATIVE_PROMPT = 'blurry, low quality, watermark, text, bad anatomy'
+
+const STEPS = 20
+const WIDTH = 512
+const HEIGHT = 512
+const CFG = 7.5
+const SEED = 42
+
+function printStats (label, stats) {
+  console.log(`\n── ${label} ${'─'.repeat(60 - label.length)}`)
+  if (!stats || typeof stats !== 'object') {
+    console.log('  (no stats available)')
+    return
+  }
+  const keys = Object.keys(stats)
+  const maxLen = Math.max(...keys.map(k => k.length))
+  for (const [key, value] of Object.entries(stats)) {
+    const formatted = typeof value === 'number'
+      ? (Number.isInteger(value) ? String(value) : value.toFixed(4))
+      : String(value)
+    console.log(`  ${key.padEnd(maxLen)}  ${formatted}`)
+  }
+}
+
+async function main () {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+
+  console.log('Stable Diffusion 2.1 — RuntimeStats Example')
+  console.log('=============================================')
+  console.log('Model  :', MODEL_NAME)
+  console.log('Prompt :', PROMPT)
+  console.log('Steps  :', STEPS)
+  console.log('Size   :', `${WIDTH}x${HEIGHT}`)
+  console.log('CFG    :', CFG)
+  console.log('Seed   :', SEED)
+  console.log()
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: MODELS_DIR,
+      modelName: MODEL_NAME,
+      opts: { stats: true }
+    },
+    {
+      threads: 4,
+      prediction: 'v'
+    }
+  )
+
+  try {
+    // ── 1. Load weights ─────────────────────────────────────────────────────
+    console.log('Loading model weights...')
+    const tLoad = Date.now()
+    await model.load()
+    console.log(`Loaded in ${((Date.now() - tLoad) / 1000).toFixed(1)}s\n`)
+
+    // ── 2. First generation ─────────────────────────────────────────────────
+    console.log('Starting generation 1/2...')
+    const images1 = []
+
+    const response1 = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      cfg_scale: CFG,
+      seed: SEED
+    })
+
+    let stats1 = null
+    response1.on('stats', (s) => { stats1 = s })
+
+    await response1
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images1.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`Got ${images1.length} image(s)`)
+
+    if (!stats1) stats1 = response1.stats
+    printStats('Generation 1 — RuntimeStats', stats1)
+
+    for (let i = 0; i < images1.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `sd2_stats_gen1_${i}.png`)
+      fs.writeFileSync(outPath, images1[i])
+      console.log(`Saved → ${outPath}`)
+    }
+
+    // ── 3. Second generation (cumulative stats) ─────────────────────────────
+    console.log('\nStarting generation 2/2 (different seed)...')
+    const images2 = []
+
+    const response2 = await model.run({
+      prompt: PROMPT,
+      negative_prompt: NEGATIVE_PROMPT,
+      steps: STEPS,
+      width: WIDTH,
+      height: HEIGHT,
+      cfg_scale: CFG,
+      seed: 123
+    })
+
+    let stats2 = null
+    response2.on('stats', (s) => { stats2 = s })
+
+    await response2
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images2.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              const pct = Math.round((tick.step / tick.total) * 100)
+              const bar = '█'.repeat(Math.floor(pct / 5)).padEnd(20, '░')
+              process.stdout.write(`\r  [${bar}] ${tick.step}/${tick.total} steps`)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    process.stdout.write('\n')
+    console.log(`Got ${images2.length} image(s)`)
+
+    if (!stats2) stats2 = response2.stats
+    printStats('Generation 2 — Cumulative RuntimeStats', stats2)
+
+    for (let i = 0; i < images2.length; i++) {
+      const outPath = path.join(OUTPUT_DIR, `sd2_stats_gen2_${i}.png`)
+      fs.writeFileSync(outPath, images2[i])
+      console.log(`Saved → ${outPath}`)
+    }
+  } finally {
+    console.log('\nUnloading model...')
+    await model.unload()
+    console.log('Done.')
+  }
+}
+
+main().catch(err => {
+  console.error('Fatal:', err.message || err)
+  process.exit(1)
+})
diff --git a/packages/lib-infer-diffusion/index.d.ts b/packages/lib-infer-diffusion/index.d.ts
new file mode 100644
index 0000000000..4e6e992a3e
--- /dev/null
+++ b/packages/lib-infer-diffusion/index.d.ts
@@ -0,0 +1,233 @@
+import BaseInference from '@qvac/infer-base/WeightsProvider/BaseInference'
+import type { QvacResponse } from '@qvac/infer-base'
+import type QvacLogger from '@qvac/logging'
+
+export type NumericLike = number | `${number}`
+
+export interface Addon {
+  activate(): Promise<void>
+  runJob(params: GenerationParams & { mode: 'txt2img' | 'img2img' }): Promise<boolean>
+  cancel(): Promise<void>
+  unload(): Promise<void>
+}
+
+/** Supported diffusion sampling methods */
+export type SamplerMethod =
+  | 'euler'
+  | 'euler_a'
+  | 'heun'
+  | 'dpm2'
+  | 'dpm++2m'
+  | 'dpm++2mv2'
+  | 'dpm++2s_a'
+  | 'lcm'
+  | 'ipndm'
+  | 'ipndm_v'
+  | 'ddim_trailing'
+  | 'tcd'
+  | 'res_multistep'
+  | 'res_2s'
+
+/** Supported weight quantization types */
+export type WeightType =
+  | 'auto'
+  | 'f32'
+  | 'f16'
+  | 'bf16'
+  | 'q2_k'
+  | 'q3_k'
+  | 'q4_0'
+  | 'q4_1'
+  | 'q4_k'
+  | 'q5_0'
+  | 'q5_1'
+  | 'q5_k'
+  | 'q6_k'
+  | 'q8_0'
+
+/** Supported RNG types */
+export type RngType = 'cpu' | 'cuda' | 'std_default'
+
+/** Supported sampling schedules */
+export type ScheduleType =
+  | 'discrete'
+  | 'karras'
+  | 'exponential'
+  | 'ays'
+  | 'gits'
+  | 'sgm_uniform'
+  | 'simple'
+  | 'lcm'
+  | 'smoothstep'
+  | 'kl_optimal'
+  | 'bong_tangent'
+
+/** Supported noise prediction types */
+export type PredictionType = 'auto' | 'eps' | 'v' | 'edm_v' | 'flow' | 'flux_flow' | 'flux2_flow'
+
+/** LoRA application mode */
+export type LoraApplyMode = 'auto' | 'immediately' | 'at_runtime'
+
+/** Step-caching algorithm */
+export type CacheMode = 'disabled' | 'easycache' | 'ucache' | 'dbcache' | 'taylorseer' | 'cache-dit'
+
+export interface SdConfig {
+  /** Number of CPU threads (-1 = auto) */
+  threads?: NumericLike
+  /** Preferred compute device: 'gpu' (Metal/Vulkan) or 'cpu' */
+  device?: 'gpu' | 'cpu'
+  /** Weight quantization type */
+  wtype?: WeightType
+  /** RNG type for reproducible generation */
+  rng?: RngType
+  /** RNG type for the sampler (separate from context RNG) */
+  sampler_rng?: RngType
+  /** Sampling schedule */
+  schedule?: ScheduleType
+  /** Run CLIP encoder on CPU even when GPU is available */
+  clip_on_cpu?: boolean
+  /** Run VAE decoder on CPU even when GPU is available */
+  vae_on_cpu?: boolean
+  /** Enable VAE tiling to reduce VRAM usage */
+  vae_tiling?: boolean
+  /** Enable flash attention for memory efficiency */
+  flash_attn?: boolean
+  /** Enable flash attention for diffusion model specifically */
+  diffusion_fa?: boolean
+  /** Use memory-mapped model loading */
+  mmap?: boolean
+  /** Offload model weights to CPU when not in use */
+  offload_to_cpu?: boolean
+  /** Noise prediction type override (auto-detected from model by default) */
+  prediction?: PredictionType
+  /** Flow-matching guidance shift */
+  flow_shift?: number
+  /** Use direct convolution in diffusion model */
+  diffusion_conv_direct?: boolean
+  /** Use direct convolution in VAE */
+  vae_conv_direct?: boolean
+  /** Force SDXL VAE conv scale factor */
+  force_sdxl_vae_conv_scale?: boolean
+  /** Custom backends directory path (defaults to prebuilds/) */
+  backendsDir?: string
+  /** Custom tensor type rules string */
+  tensor_type_rules?: string
+  /** LoRA application mode */
+  lora_apply_mode?: LoraApplyMode
+  /** Logging verbosity: 0=error, 1=warn, 2=info, 3=debug */
+  verbosity?: NumericLike
+  [key: string]: string | number | boolean | undefined
+}
+
+export interface GenerationParams {
+  prompt: string
+  negative_prompt?: string
+  width?: number
+  height?: number
+  steps?: number
+  /** CFG scale (SD1/SD2/SDXL/SD3) */
+  cfg_scale?: number
+  /** Distilled guidance (FLUX.2) */
+  guidance?: number
+  /** Sampler name (e.g. 'euler', 'dpm++2m') */
+  sampling_method?: SamplerMethod
+  /** Scheduler name */
+  scheduler?: ScheduleType
+  seed?: number
+  batch_count?: number
+  /** Enable VAE tiling (for large images) */
+  vae_tiling?: boolean
+  /** VAE tile dimensions — integer or 'WxH' string (e.g. '512x512') */
+  vae_tile_size?: number | string
+  /** VAE tile overlap fraction (0.0–1.0) */
+  vae_tile_overlap?: number
+  /** Step-caching algorithm */
+  cache_mode?: CacheMode
+  /** Cache preset: slow/medium/fast/ultra (shorthand for cache_mode + threshold) */
+  cache_preset?: string
+  /** Direct cache reuse threshold override (0 = library default) */
+  cache_threshold?: number
+  /** Stochasticity parameter for DDIM/TCD samplers */
+  eta?: number
+  /** Image CFG scale for img2img/inpaint (-1 = use cfg_scale) */
+  img_cfg_scale?: number
+  /** Skip last N CLIP encoder layers (SD1.x/SD2.x) */
+  clip_skip?: number
+  /** Input image as PNG/JPEG bytes — if provided, runs img2img instead of txt2img */
+  init_image?: Uint8Array
+  /** img2img denoising strength (0.0–1.0). 0 = keep source, 1 = ignore source */
+  strength?: number
+}
+
+/**
+ * Shape of the stats object emitted on the 'stats' event of a QvacResponse.
+ *
+ * All time values are in milliseconds. Cumulative fields (totalGenerationMs,
+ * totalWallMs, totalSteps, totalGenerations, totalImages, totalPixels) accumulate
+ * across the lifetime of the model instance; per-job fields (generationMs, width,
+ * height, seed) reflect only the most recent generation.
+ *
+ * Derivable rates (stepsPerSecond, msPerStep, megapixelsPerSecond) are intentionally
+ * omitted — callers can compute them from the primitives provided:
+ *   stepsPerSecond    = totalSteps  / (totalWallMs / 1000)
+ *   msPerStep         = totalWallMs / totalSteps
+ *   megapixelsPerSec  = (totalPixels / 1e6) / (totalWallMs / 1000)
+ */
+export interface RuntimeStats {
+  /** Wall time to load the model weights (ms) */
+  modelLoadMs: number
+  /** Wall time for the most recent generation job (ms) */
+  generationMs: number
+  /** Cumulative generation time across all jobs (ms) */
+  totalGenerationMs: number
+  /** Cumulative wall time across all jobs (ms) */
+  totalWallMs: number
+  /** Cumulative diffusion steps across all jobs */
+  totalSteps: number
+  /** Cumulative number of generation calls */
+  totalGenerations: number
+  /** Cumulative number of images produced */
+  totalImages: number
+  /** Cumulative number of pixels produced */
+  totalPixels: number
+  /** Width of the most recent generated image (px) */
+  width: number
+  /** Height of the most recent generated image (px) */
+  height: number
+  /** Seed used for the most recent generation */
+  seed: number
+}
+
+export interface ImgStableDiffusionArgs {
+  logger?: QvacLogger | Console | null
+  opts?: { stats?: boolean }
+  diskPath?: string
+  modelName: string
+  /** FLUX.1 / SD3: separate CLIP-L text encoder */
+  clipLModel?: string
+  /** SDXL / SD3: separate CLIP-G text encoder */
+  clipGModel?: string
+  /** FLUX.1 / SD3: separate T5-XXL text encoder */
+  t5XxlModel?: string
+  /** FLUX.2 [klein]: Qwen3 8B text encoder (llm_path) */
+  llmModel?: string
+  vaeModel?: string
+}
+
+export default class ImgStableDiffusion extends BaseInference {
+  protected addon: Addon
+
+  constructor(args: ImgStableDiffusionArgs, config: SdConfig)
+
+  _load(): Promise<void>
+
+  load(): Promise<void>
+
+  run(params: GenerationParams): Promise<QvacResponse>
+
+  unload(): Promise<void>
+
+  cancel(): Promise<void>
+}
+
+export { QvacResponse, RuntimeStats }
diff --git a/packages/lib-infer-diffusion/index.js b/packages/lib-infer-diffusion/index.js
new file mode 100644
index 0000000000..b33988a9fc
--- /dev/null
+++ b/packages/lib-infer-diffusion/index.js
@@ -0,0 +1,254 @@
+'use strict'
+
+const path = require('bare-path')
+
+const BaseInference = require('@qvac/infer-base/WeightsProvider/BaseInference')
+const { SdInterface } = require('./addon')
+
+const LOG_METHODS = ['error', 'warn', 'info', 'debug']
+
+const RUN_BUSY_ERROR_MESSAGE = 'Cannot set new job: a job is already set or being processed'
+
+/**
+ * Text-to-image and image-to-image generation using stable-diffusion.cpp.
+ * Supports SD1.x, SD2.x, SDXL, SD3, and FLUX.2 [klein].
+ */
+class ImgStableDiffusion extends BaseInference {
+  /**
+   * @param {object} args
+   * @param {object} [args.logger] - Structured logger
+   * @param {object} [args.opts] - Optional inference options
+   * @param {string} [args.diskPath='.'] - Local directory containing model weight files
+   * @param {string} args.modelName - Model file name (e.g. 'flux1-dev-q4_0.gguf')
+   * @param {string} [args.clipLModel] - Optional CLIP-L model file name (FLUX.1 / SD3)
+   * @param {string} [args.clipGModel] - Optional CLIP-G model file name (SDXL / SD3)
+   * @param {string} [args.t5XxlModel] - Optional T5-XXL text encoder file name (FLUX.1 / SD3)
+   * @param {string} [args.llmModel] - Optional LLM text encoder file name (FLUX.2 klein → Qwen3 8B)
+   * @param {string} [args.vaeModel] - Optional VAE file name
+   * @param {object} config - SD context configuration (threads, device, wtype, etc.)
+   */
+  constructor (
+    {
+      opts = {},
+      logger = null,
+      diskPath = '.',
+      modelName,
+      clipLModel,
+      clipGModel,
+      t5XxlModel,
+      llmModel,
+      vaeModel
+    },
+    config
+  ) {
+    super({ logger, opts })
+    this._config = config
+    this._diskPath = diskPath
+    this._modelName = modelName
+    this._clipLModel = clipLModel || null
+    this._clipGModel = clipGModel || null
+    this._t5XxlModel = t5XxlModel || null
+    this._llmModel = llmModel || null
+    this._vaeModel = vaeModel || null
+    this._hasActiveResponse = false
+  }
+
+  async _load () {
+    this.logger.info('Starting stable-diffusion model load')
+
+    try {
+      // Route the primary model file to the correct stable-diffusion.cpp param:
+      //
+      //   model_path           — all-in-one checkpoints that embed their own text
+      //                          encoders and version metadata (SD1.x, SD2.x, SDXL,
+      //                          SD3 all-in-one GGUF).
+      //
+      //   diffusion_model_path — standalone diffusion-only weights that have no
+      //                          embedded SD metadata and require separate encoders:
+      //                            FLUX.2 [klein] → llmModel (Qwen3)
+      //                            SD3 pure GGUF  → t5XxlModel (T5-XXL) + clipLModel + clipGModel
+      //
+      // Heuristic: if any separate encoder is provided (LLM for FLUX.2, T5-XXL
+      // for SD3 split) the caller is using a pure diffusion GGUF that must be
+      // loaded via diffusion_model_path.
+      const isSplitLayout = !!this._llmModel || !!this._t5XxlModel
+      const configurationParams = {
+        path: isSplitLayout ? '' : path.join(this._diskPath, this._modelName),
+        diffusionModelPath: isSplitLayout ? path.join(this._diskPath, this._modelName) : '',
+        clipLPath: this._clipLModel ? path.join(this._diskPath, this._clipLModel) : '',
+        clipGPath: this._clipGModel ? path.join(this._diskPath, this._clipGModel) : '',
+        t5XxlPath: this._t5XxlModel ? path.join(this._diskPath, this._t5XxlModel) : '',
+        llmPath: this._llmModel ? path.join(this._diskPath, this._llmModel) : '',
+        vaePath: this._vaeModel ? path.join(this._diskPath, this._vaeModel) : '',
+        config: this._config
+      }
+
+      this.logger.info('Creating stable-diffusion addon with configuration:', configurationParams)
+      this.addon = this._createAddon(configurationParams)
+
+      this.logger.info('Activating stable-diffusion addon')
+      await this.addon.activate()
+
+      this.logger.info('Stable-diffusion model load completed successfully')
+    } catch (error) {
+      this.logger.error('Error during stable-diffusion model load:', error)
+      throw error
+    }
+  }
+
+  /**
+   * @param {object} configurationParams
+   * @returns {SdInterface}
+   */
+  _createAddon (configurationParams) {
+    this._binding = require('./binding')
+    this._connectNativeLogger()
+    return new SdInterface(
+      this._binding,
+      configurationParams,
+      this._addonOutputCallback.bind(this)
+    )
+  }
+
+  _connectNativeLogger () {
+    if (!this._binding || !this.logger) return
+    try {
+      this._binding.setLogger((priority, message) => {
+        const method = LOG_METHODS[priority] || 'info'
+        if (typeof this.logger[method] === 'function') {
+          this.logger[method](`[C++] ${message}`)
+        }
+      })
+      this._nativeLoggerActive = true
+    } catch (err) {
+      this.logger.warn('Failed to connect native logger:', err.message)
+    }
+  }
+
+  _releaseNativeLogger () {
+    if (!this._nativeLoggerActive || !this._binding) return
+    try {
+      this._binding.releaseLogger()
+    } catch (_) {}
+    this._nativeLoggerActive = false
+  }
+
+  _addonOutputCallback (addon, event, data, error) {
+    if (event.includes('Error')) {
+      return this._outputCallback(addon, 'Error', 'OnlyOneJob', data, error)
+    }
+
+    if (data instanceof Uint8Array || typeof data === 'string') {
+      return this._outputCallback(addon, 'Output', 'OnlyOneJob', data, error)
+    }
+
+    // RuntimeStats is the only plain-object payload the C++ addon emits.
+    // Matching structurally avoids coupling to specific stats key names.
+    if (typeof data === 'object' && data !== null) {
+      return this._outputCallback(addon, 'JobEnded', 'OnlyOneJob', data, null)
+    }
+
+    return this._outputCallback(addon, event, 'OnlyOneJob', data, error)
+  }
+
+  /**
+   * Cancel the current generation job.
+   */
+  async cancel () {
+    if (this.addon?.cancel) {
+      await this.addon.cancel()
+    }
+  }
+
+  /**
+   * Unload the model and release all resources.
+   */
+  async unload () {
+    return await this._withExclusiveRun(async () => {
+      await this.cancel()
+      const currentJobResponse = this._jobToResponse.get('OnlyOneJob')
+      if (currentJobResponse) {
+        currentJobResponse.failed(new Error('Model was unloaded'))
+        this._deleteJobMapping('OnlyOneJob')
+      }
+      this._hasActiveResponse = false
+      if (this.addon) {
+        await super.unload()
+      }
+      this._releaseNativeLogger()
+    })
+  }
+
+  /**
+   * Generate an image from a text prompt, or from an input image + text prompt.
+   *
+   * Mode is determined automatically:
+   *   - If `params.init_image` is provided → img2img
+   *   - Otherwise → txt2img
+   *
+   * Returns a QvacResponse that streams two types of updates:
+   *   - Uint8Array  — PNG-encoded output image (one per batch_count)
+   *   - string      — JSON step-progress tick: {"step":N,"total":M,"elapsed_ms":T}
+   *
+   * @param {object} params
+   * @param {string} params.prompt                  - Text prompt
+   * @param {string} [params.negative_prompt]       - Negative prompt
+   * @param {number} [params.steps=20]              - Denoising step count
+   * @param {number} [params.width=512]             - Output width (multiple of 8)
+   * @param {number} [params.height=512]            - Output height (multiple of 8)
+   * @param {number} [params.guidance=3.5]          - Distilled guidance (FLUX.2)
+   * @param {number} [params.cfg_scale=7.0]         - CFG scale (SD1/SD2)
+   * @param {string} [params.sampling_method]       - Sampler name
+   * @param {string} [params.scheduler]             - Scheduler name
+   * @param {number} [params.seed=-1]               - RNG seed; -1 = random
+   * @param {number} [params.batch_count=1]         - Images per call
+   * @param {boolean} [params.vae_tiling=false]     - Enable VAE tiling (for large images)
+   * @param {string}  [params.cache_preset]         - Cache preset: slow/medium/fast/ultra
+   * @param {Uint8Array} [params.init_image]        - Source image bytes for img2img (PNG/JPEG)
+   * @param {number}    [params.strength=0.75]      - img2img: 0 = keep source, 1 = ignore source
+   * @returns {Promise<QvacResponse>}
+   */
+  async _runInternal (params) {
+    if (params.init_image) {
+      throw new Error('img2img is not yet supported — omit init_image to run txt2img')
+    }
+
+    const mode = 'txt2img'
+    this.logger.info('Starting generation with mode:', mode)
+
+    return await this._withExclusiveRun(async () => {
+      if (this._hasActiveResponse) {
+        throw new Error(RUN_BUSY_ERROR_MESSAGE)
+      }
+
+      const response = this._createResponse('OnlyOneJob')
+
+      let accepted
+      try {
+        accepted = await this.addon.runJob({ ...params, mode })
+      } catch (error) {
+        this._deleteJobMapping('OnlyOneJob')
+        response.failed(error)
+        throw error
+      }
+
+      if (!accepted) {
+        this._deleteJobMapping('OnlyOneJob')
+        const msg = RUN_BUSY_ERROR_MESSAGE
+        response.failed(new Error(msg))
+        throw new Error(msg)
+      }
+
+      this._hasActiveResponse = true
+      const finalized = response.await().finally(() => { this._hasActiveResponse = false })
+      finalized.catch(() => {})
+      response.await = () => finalized
+
+      this.logger.info('Generation job started successfully')
+
+      return response
+    })
+  }
+}
+
+module.exports = ImgStableDiffusion
diff --git a/packages/lib-infer-diffusion/package.json b/packages/lib-infer-diffusion/package.json
new file mode 100644
index 0000000000..48c9fd7568
--- /dev/null
+++ b/packages/lib-infer-diffusion/package.json
@@ -0,0 +1,98 @@
+{
+  "name": "@qvac/diffusion-cpp",
+  "version": "0.1.0",
+  "description": "stable-diffusion.cpp addon for qvac image/video generation",
+  "addon": true,
+  "scripts": {
+    "build": "bare-make generate && bare-make build && bare-make install",
+    "build:vulkan": "bare-make generate && bare-make build && bare-make install",
+    "build:cuda": "bare-make generate -D SD_CUDA=ON && bare-make build && bare-make install",
+    "example": "bare examples/load-model.js",
+    "generate": "bare examples/generate-image.js",
+    "generate:sd2": "bare examples/generate-image-sd2.js",
+    "generate:sdxl": "bare examples/generate-image-sdxl.js",
+    "generate:sd3": "bare examples/generate-image-sd3.js",
+    "build:pack": "mkdir -p dist && npm pack --pack-destination dist",
+    "mobile:copy-prebuilds": "cp -r prebuilds/android-arm64 prebuilds/android-ia32 || echo 'Warning: Failed to copy sd prebuilds to android-ia32'; cp -r prebuilds/android-arm64 prebuilds/android-arm || echo 'Warning: Failed to copy sd prebuilds to android-arm'; cp -r prebuilds/android-arm64 prebuilds/android-x64 || echo 'Warning: Failed to copy sd prebuilds to android-x64'; cp -r prebuilds/ios-arm64 prebuilds/ios-arm64-simulator 2>/dev/null || echo 'iOS prebuilds already present'; cp -r prebuilds/ios-arm64 prebuilds/ios-x64-simulator 2>/dev/null || echo 'iOS prebuilds already present'",
+    "lint": "standard --ignore \"addon/**\"",
+    "lint:fix": "standard --ignore \"addon/**\" --fix",
+    "lint-cpp": "clang-tidy -p build $(find addon -name '*.cpp')",
+    "test": "npm run test:integration",
+    "test:integration": "npm run test:integration:generate && bare test/integration/all.js --exit",
+    "test:integration:generate": "brittle -r test/integration/all.js test/integration/*.test.js && npm run test:mobile:generate",
+    "test:mobile:generate": "bare ./scripts/generate-mobile-integration-tests.js",
+    "test:mobile:validate": "node scripts/validate-mobile-tests.js",
+    "test:dts": "tsc -p tsconfig.dts.json",
+    "test:cpp:build": "bare-make generate -D BUILD_TESTING=ON && bare-make build --target addon-test",
+    "test:cpp:run": "cd build/test/unit/ && ./addon-test --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:run:unit": "cd build/test/unit/ && ./addon-test --gtest_filter='SdModelTest.*:SdBackendSelectionTest.*' --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:run:loading": "cd build/test/unit/ && ./addon-test --gtest_filter='SdModelLoadingTest.*' --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:run:inference": "cd build/test/unit/ && ./addon-test --gtest_filter='SdSingleStepInferenceTest.*' --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:run:generation": "cd build/test/unit/ && ./addon-test --gtest_filter='SdFullGenerationTest.*' --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:run:model": "cd build/test/unit/ && ./addon-test --gtest_filter='SdModelLoadingTest.*:SdSingleStepInferenceTest.*:SdFullGenerationTest.*' --gtest_output=xml:cpp-test-results.xml",
+    "test:cpp:list": "cd build/test/unit/ && ./addon-test --gtest_list_tests",
+    "test:cpp": "npm run test:cpp:build && npm run test:cpp:run",
+    "coverage:cpp:build": "bare-make generate -D BUILD_TESTING=ON -D ENABLE_COVERAGE=ON && bare-make build --target addon-test",
+    "coverage:cpp:run": "cd build/test/unit/ && LLVM_PROFILE_FILE=default.profraw ./addon-test --gtest_output=xml:cpp-test-results.xml",
+    "coverage:cpp:summary": "cd build/test/unit && llvm-cov-19 report ./addon-test --instr-profile=coverage.profdata -ignore-filename-regex='(tests|build|node_modules|gtest|gmock|\\.vcpkg|/usr)/' > coverage-summary.txt",
+    "coverage:cpp:report": "cd build/test/unit/ && ls -lha && llvm-profdata-19 merge -sparse default.profraw -o coverage.profdata && llvm-cov-19 show ./addon-test -instr-profile=coverage.profdata -format=html -output-dir=coverage-html -ignore-filename-regex='(tests|build|node_modules|gtest|gmock|\\.vcpkg|/usr)/' && llvm-cov-19 export ./addon-test -instr-profile=coverage.profdata -format=lcov -ignore-filename-regex='(tests|build|node_modules|gtest|gmock|\\.vcpkg|/usr)/' > lcov.info && npm run coverage:cpp:summary",
+    "coverage:cpp": "npm run coverage:cpp:build && npm run coverage:cpp:run && npm run coverage:cpp:report",
+    "test:all": "npm run test && npm run test:cpp"
+  },
+  "files": [
+    "binding.js",
+    "index.js",
+    "addon.js",
+    "addonLogging.js",
+    "addonLogging.d.ts",
+    "prebuilds",
+    "index.d.ts",
+    "LICENSE",
+    "NOTICE"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp.git"
+  },
+  "author": "Tether",
+  "license": "Apache-2.0",
+  "bugs": "https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp/issues",
+  "homepage": "https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp#readme",
+  "devDependencies": {
+    "@types/node": "^24.2.1",
+    "bare-buffer": "^3.4.2",
+    "bare-fs": "^4.5.1",
+    "bare-os": "^3.7.1",
+    "bare-subprocess": "^5.2.2",
+    "bare-url": "^2.1.6",
+    "brittle": "^3.19.1",
+    "cmake-bare": "1.7.5",
+    "cmake-vcpkg": "^1.1.0",
+    "standard": "^17.0.0",
+    "typescript": "^5.9.2"
+  },
+  "dependencies": {
+    "@qvac/infer-base": "^0.2.2",
+    "bare-path": "^3.0.0",
+    "bare-process": "^4.2.2"
+  },
+  "engines": {
+    "bare": ">=1.24.0"
+  },
+  "peerDependencies": {},
+  "exports": {
+    "./package": "./package.json",
+    ".": {
+      "types": "./index.d.ts",
+      "default": "./index.js"
+    },
+    "./addonLogging": {
+      "types": "./addonLogging.d.ts",
+      "default": "./addonLogging.js"
+    },
+    "./addonLogging.js": "./addonLogging.js",
+    "./addon.js": "./addon.js",
+    "./binding.js": "./binding.js"
+  },
+  "types": "index.d.ts"
+}
diff --git a/packages/lib-infer-diffusion/scripts/download-model-sd2.sh b/packages/lib-infer-diffusion/scripts/download-model-sd2.sh
new file mode 100755
index 0000000000..0771b80267
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/download-model-sd2.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Stable Diffusion 2.1 — GGUF Q8_0 (2.32 GB, no authentication required).
+#
+# Source: gpustack/stable-diffusion-v2-1-GGUF (public, no login needed)
+# Converted from stabilityai/stable-diffusion-2-1 using stable-diffusion.cpp.
+#
+# All-in-one file: no separate text encoder or VAE needed.
+# Disk: ~2.32 GB    RAM: ~3.5 GB at runtime
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUT="$(cd "$SCRIPT_DIR/.." && pwd)/models"
+HF="https://huggingface.co"
+
+mkdir -p "$OUT"
+
+dl() {
+  local url="$1" dest="$2"
+  [[ -f "$dest" ]] && echo "exists: $(basename "$dest")" && return
+  echo "downloading: $(basename "$dest")"
+  curl -fL --progress-bar --retry 5 --retry-delay 3 --retry-connrefused -C - -o "$dest" "$url" \
+    || { rm -f "$dest"; exit 1; }
+}
+
+dl "$HF/gpustack/stable-diffusion-v2-1-GGUF/resolve/main/stable-diffusion-v2-1-Q8_0.gguf" \
+   "$OUT/stable-diffusion-v2-1-Q8_0.gguf"
+
+echo "done → $OUT"
diff --git a/packages/lib-infer-diffusion/scripts/download-model-sd3.sh b/packages/lib-infer-diffusion/scripts/download-model-sd3.sh
new file mode 100755
index 0000000000..be9f583883
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/download-model-sd3.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Stable Diffusion 3 Medium — official safetensors (ungated mirror).
+#
+# Source: adamo1139/stable-diffusion-3-medium-ungated
+#         Ungated re-upload of the official stabilityai/stable-diffusion-3-medium
+#         weights.  No HuggingFace account or token required.
+#
+# File downloaded:
+#   sd3_medium_incl_clips.safetensors    5.97 GB
+#     All-in-one: diffusion model + CLIP-L + CLIP-G text encoders.
+#     No T5-XXL — text-following quality is slightly lower but RAM usage is
+#     comfortable on 16 GB unified memory.
+#
+# Optional — better quality with T5-XXL (adds ~4.9 GB download + ~5 GB RAM):
+#   Uncomment the t5xxl download block below and use generate-image-sd3-split.js.
+#
+# Disk: ~6.0 GB    RAM: ~7–8 GB at runtime (without T5-XXL)
+# Minimum recommended: 12 GB unified memory
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUT="$(cd "$SCRIPT_DIR/.." && pwd)/models"
+HF="https://huggingface.co"
+REPO="adamo1139/stable-diffusion-3-medium-ungated"
+
+mkdir -p "$OUT"
+
+dl() {
+  local url="$1" dest="$2"
+  [[ -f "$dest" ]] && echo "exists: $(basename "$dest")" && return
+  echo "downloading: $(basename "$dest")"
+  curl -fL --progress-bar --retry 5 --retry-delay 3 --retry-connrefused -C - -o "$dest" "$url" \
+    || { rm -f "$dest"; exit 1; }
+}
+
+# All-in-one: diffusion model + CLIP-L + CLIP-G (no T5-XXL)
+dl "$HF/$REPO/resolve/main/sd3_medium_incl_clips.safetensors" \
+   "$OUT/sd3_medium_incl_clips.safetensors"
+
+# Optional: T5-XXL FP8 for much better prompt understanding (~4.89 GB)
+# Uncomment to download:
+# dl "$HF/$REPO/resolve/main/text_encoders/t5xxl_fp8_e4m3fn.safetensors" \
+#    "$OUT/t5xxl_fp8_e4m3fn.safetensors"
+
+echo "done → $OUT"
diff --git a/packages/lib-infer-diffusion/scripts/download-model-sdxl-q4.sh b/packages/lib-infer-diffusion/scripts/download-model-sdxl-q4.sh
new file mode 100755
index 0000000000..2de6155781
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/download-model-sdxl-q4.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Stable Diffusion XL Base 1.0 — GGUF Q4_0 (3.94 GB, no authentication required).
+#
+# Source: gpustack/stable-diffusion-xl-base-1.0-GGUF (public, no login needed)
+#
+# Available Q4 variants in this repo:
+#   Q4_0  — 3.94 GB  (this script)
+#   Q4_1  — 4.08 GB  (slightly better quality; swap the filename below to use it)
+#
+# All-in-one file: CLIP-L, CLIP-G, UNet, and VAE are all baked in.
+# No separate text encoder or VAE needed.
+#
+# Disk: ~3.94 GB    RAM: ~4.5 GB at runtime
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUT="$(cd "$SCRIPT_DIR/.." && pwd)/models"
+HF="https://huggingface.co"
+
+mkdir -p "$OUT"
+
+dl() {
+  local url="$1" dest="$2"
+  [[ -f "$dest" ]] && echo "exists: $(basename "$dest")" && return
+  echo "downloading: $(basename "$dest")"
+  curl -fL --progress-bar --retry 5 --retry-delay 3 --retry-connrefused -C - -o "$dest" "$url" \
+    || { rm -f "$dest"; exit 1; }
+}
+
+dl "$HF/gpustack/stable-diffusion-xl-base-1.0-GGUF/resolve/main/stable-diffusion-xl-base-1.0-Q4_0.gguf" \
+   "$OUT/stable-diffusion-xl-base-1.0-Q4_0.gguf"
+
+echo "done → $OUT"
diff --git a/packages/lib-infer-diffusion/scripts/download-model-sdxl.sh b/packages/lib-infer-diffusion/scripts/download-model-sdxl.sh
new file mode 100755
index 0000000000..8698f462a5
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/download-model-sdxl.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Stable Diffusion XL Base 1.0 — GGUF Q8_0 (4.27 GB, no authentication required).
+#
+# Source: gpustack/stable-diffusion-xl-base-1.0-GGUF (public, no login needed)
+# Converted from stabilityai/stable-diffusion-xl-base-1.0 using stable-diffusion.cpp.
+#
+# All-in-one file: CLIP-L, CLIP-G, UNet, and VAE are all baked in.
+# No separate text encoder or VAE needed.
+#
+# Disk: ~4.27 GB    RAM: ~5.5 GB at runtime
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUT="$(cd "$SCRIPT_DIR/.." && pwd)/models"
+HF="https://huggingface.co"
+
+mkdir -p "$OUT"
+
+dl() {
+  local url="$1" dest="$2"
+  [[ -f "$dest" ]] && echo "exists: $(basename "$dest")" && return
+  echo "downloading: $(basename "$dest")"
+  curl -fL --progress-bar --retry 5 --retry-delay 3 --retry-connrefused -C - -o "$dest" "$url" \
+    || { rm -f "$dest"; exit 1; }
+}
+
+dl "$HF/gpustack/stable-diffusion-xl-base-1.0-GGUF/resolve/main/stable-diffusion-xl-base-1.0-Q8_0.gguf" \
+   "$OUT/stable-diffusion-xl-base-1.0-Q8_0.gguf"
+
+echo "done → $OUT"
diff --git a/packages/lib-infer-diffusion/scripts/download-model.sh b/packages/lib-infer-diffusion/scripts/download-model.sh
new file mode 100755
index 0000000000..dd56d636e0
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/download-model.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUT="$(cd "$SCRIPT_DIR/.." && pwd)/models"
+HF="https://huggingface.co"
+
+mkdir -p "$OUT"
+
+dl() {
+  local url="$1" dest="$2"
+  [[ -f "$dest" ]] && echo "exists: $(basename "$dest")" && return
+  echo "downloading: $(basename "$dest")"
+  # -C - resumes a partial download; --retry retries on transient errors
+  curl -fL --progress-bar --retry 5 --retry-delay 3 --retry-connrefused -C - -o "$dest" "$url" \
+    || { rm -f "$dest"; exit 1; }
+}
+
+dl "$HF/leejet/FLUX.2-klein-4B-GGUF/resolve/main/flux-2-klein-4b-Q8_0.gguf"        "$OUT/flux-2-klein-4b-Q8_0.gguf"
+# Qwen3-4B Q4_K_M GGUF text encoder — fp4 safetensors is NOT supported by ggml
+dl "$HF/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf"                  "$OUT/Qwen3-4B-Q4_K_M.gguf"
+dl "$HF/black-forest-labs/FLUX.2-klein-4B/resolve/main/vae/diffusion_pytorch_model.safetensors" "$OUT/flux2-vae.safetensors"
+
+echo "done → $OUT"
diff --git a/packages/lib-infer-diffusion/scripts/generate-mobile-integration-tests.js b/packages/lib-infer-diffusion/scripts/generate-mobile-integration-tests.js
new file mode 100644
index 0000000000..f72b2306de
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/generate-mobile-integration-tests.js
@@ -0,0 +1,68 @@
+'use strict'
+
+const fs = require('bare-fs')
+const path = require('bare-path')
+
+const repoRoot = path.resolve(__dirname, '..')
+const integrationDir = path.join(repoRoot, 'test', 'integration')
+const mobileDir = path.join(repoRoot, 'test', 'mobile')
+const outputFile = path.join(mobileDir, 'integration.auto.cjs')
+
+function getIntegrationFiles () {
+  if (!fs.existsSync(integrationDir)) {
+    throw new Error(`Integration directory not found: ${integrationDir}`)
+  }
+
+  return fs.readdirSync(integrationDir)
+    .filter(entry => entry.endsWith('.test.js'))
+    .sort()
+}
+
+function toFunctionName (fileName) {
+  const base = fileName.replace(/\.js$/, '')
+  const parts = base.split(/[^a-zA-Z0-9]+/).filter(Boolean)
+  const suffix = parts.map(part => part.charAt(0).toUpperCase() + part.slice(1)).join('')
+  return `run${suffix}`
+}
+
+function buildFileContents (files) {
+  const lines = []
+  lines.push("'use strict'")
+  lines.push("require('./integration-runtime.cjs')")
+  lines.push('')
+  lines.push('// AUTO-GENERATED FILE. Run `npm run test:mobile:generate` to update.')
+  lines.push('// Each function mirrors a single file under test/integration/.')
+  lines.push('')
+  lines.push('/* global runIntegrationModule */')
+  lines.push('')
+
+  for (let i = 0; i < files.length; i++) {
+    const file = files[i]
+    const fnName = toFunctionName(file)
+    const relativePath = `../integration/${file}`
+    lines.push(`async function ${fnName} (options = {}) { // eslint-disable-line no-unused-vars`)
+    lines.push(`  return runIntegrationModule('${relativePath}', options)`)
+    lines.push('}')
+    // Only add blank line between functions, not after the last one
+    if (i < files.length - 1) {
+      lines.push('')
+    }
+  }
+
+  return `${lines.join('\n')}\n`
+}
+
+function main () {
+  const files = getIntegrationFiles()
+  if (files.length === 0) {
+    throw new Error(`No integration test files found inside ${integrationDir}`)
+  }
+
+  const content = buildFileContents(files)
+  fs.writeFileSync(outputFile, content, 'utf8')
+  console.log(`Generated ${outputFile} with ${files.length} integration runners.`)
+}
+
+if (require.main === module) {
+  main()
+}
diff --git a/packages/lib-infer-diffusion/scripts/validate-mobile-tests.js b/packages/lib-infer-diffusion/scripts/validate-mobile-tests.js
new file mode 100644
index 0000000000..d455da0731
--- /dev/null
+++ b/packages/lib-infer-diffusion/scripts/validate-mobile-tests.js
@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+'use strict'
+
+const fs = require('fs')
+const path = require('path')
+
+const repoRoot = path.resolve(__dirname, '..')
+const integrationDir = path.join(repoRoot, 'test', 'integration')
+const mobileAutoFile = path.join(repoRoot, 'test', 'mobile', 'integration.auto.cjs')
+
+function getIntegrationTestFiles () {
+  if (!fs.existsSync(integrationDir)) {
+    throw new Error(`Integration directory not found: ${integrationDir}`)
+  }
+
+  return fs.readdirSync(integrationDir)
+    .filter(f => f.endsWith('.test.js'))
+    .sort()
+}
+
+function getGeneratedIntegrationRefs (content) {
+  const references = new Set()
+  const referencePattern = /runIntegrationModule\('\.\.\/integration\/([^']+)'(?:,\s*options)?\)/g
+  let match = referencePattern.exec(content)
+
+  while (match !== null) {
+    references.add(match[1])
+    match = referencePattern.exec(content)
+  }
+
+  return references
+}
+
+function setDiff (left, right) {
+  const rightSet = right instanceof Set ? right : new Set(right)
+  return [...left].filter(item => !rightSet.has(item)).sort((a, b) => a - b)
+}
+
+function printMismatchDetails (label, items) {
+  console.error(`   ${label}:`)
+  items.forEach(item => console.error(`     - ${item}`))
+}
+
+try {
+  const integrationFiles = getIntegrationTestFiles()
+  if (!fs.existsSync(mobileAutoFile)) {
+    console.error('❌ Mobile integration tests not generated!')
+    console.error('   Run: npm run test:mobile:generate')
+    process.exit(1)
+  }
+
+  const expectedSet = new Set(integrationFiles)
+  const mobileAutoContent = fs.readFileSync(mobileAutoFile, 'utf8')
+  const generatedSet = getGeneratedIntegrationRefs(mobileAutoContent)
+
+  const missingFromGenerated = setDiff(expectedSet, generatedSet)
+  const staleInGenerated = setDiff(generatedSet, expectedSet)
+
+  if (missingFromGenerated.length > 0 || staleInGenerated.length > 0) {
+    console.error('❌ Mobile integration tests are out of sync with test/integration')
+    if (missingFromGenerated.length > 0) {
+      printMismatchDetails('Missing from integration.auto.cjs', missingFromGenerated)
+    }
+    if (staleInGenerated.length > 0) {
+      printMismatchDetails('Stale references in integration.auto.cjs', staleInGenerated)
+    }
+    console.error('   Run: npm run test:mobile:generate')
+    process.exit(1)
+  }
+
+  if (integrationFiles.length === 0) {
+    console.log('✅ Mobile integration tests are up to date (no integration tests found)')
+    process.exit(0)
+  }
+
+  // Keep timestamp validation as a fast stale-content signal for edited tests.
+  const latestIntegrationTime = Math.max(
+    ...integrationFiles.map(f => fs.statSync(path.join(integrationDir, f)).mtimeMs)
+  )
+  const mobileAutoTime = fs.statSync(mobileAutoFile).mtimeMs
+
+  if (latestIntegrationTime > mobileAutoTime) {
+    console.error('❌ Mobile integration tests are out of date!')
+    console.error('   Integration tests modified after mobile tests were generated.')
+    console.error('   Run: npm run test:mobile:generate')
+    process.exit(1)
+  }
+
+  console.log('✅ Mobile integration tests are up to date')
+  process.exit(0)
+} catch (error) {
+  console.error('Error validating mobile tests:', error.message)
+  process.exit(1)
+}
diff --git a/packages/lib-infer-diffusion/test/integration/api-behavior.test.js b/packages/lib-infer-diffusion/test/integration/api-behavior.test.js
new file mode 100644
index 0000000000..e541bc18b8
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/api-behavior.test.js
@@ -0,0 +1,190 @@
+'use strict'
+
+const test = require('brittle')
+const os = require('bare-os')
+const proc = require('bare-process')
+const binding = require('../../binding')
+const ImgStableDiffusion = require('../../index')
+const {
+  ensureModel,
+  setupJsLogger
+} = require('./utils')
+
+const isDarwinX64 = os.platform() === 'darwin' && os.arch() === 'x64'
+const isLinuxArm64 = os.platform() === 'linux' && os.arch() === 'arm64'
+const isAndroid = os.platform() === 'android'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+
+// Smallest model for fast behavior tests
+const MODEL = {
+  name: 'stable-diffusion-v2-1-Q8_0.gguf',
+  url: 'https://huggingface.co/gpustack/stable-diffusion-v2-1-GGUF/resolve/main/stable-diffusion-v2-1-Q8_0.gguf'
+}
+
+// Many steps so cancel has time to fire before completion
+const LONG_PARAMS = {
+  prompt: 'a red fox in a snowy forest',
+  steps: 50,
+  width: 256,
+  height: 256,
+  cfg_scale: 7.5,
+  seed: 42
+}
+
+const SHORT_PARAMS = {
+  prompt: 'a red fox',
+  steps: 2,
+  width: 256,
+  height: 256,
+  cfg_scale: 7.5,
+  seed: 1
+}
+
+async function setupModel (t) {
+  setupJsLogger(binding)
+
+  const [modelName, modelDir] = await ensureModel({
+    modelName: MODEL.name,
+    downloadUrl: MODEL.url
+  })
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: modelDir,
+      modelName
+    },
+    {
+      device: useCpu ? 'cpu' : 'gpu',
+      vae_on_cpu: isAndroid,
+      threads: 4,
+      prediction: 'v',
+      verbosity: '2'
+    }
+  )
+
+  await model.load()
+
+  t.teardown(async () => {
+    await model.unload().catch(() => {})
+    try { binding.releaseLogger() } catch (_) {}
+  })
+
+  return { model }
+}
+
+test('idle | run: allowed, returns QvacResponse', { timeout: 600000 }, async t => {
+  const { model } = await setupModel(t)
+  const response = await model.run(SHORT_PARAMS)
+  t.ok(response, 'run() returns a response')
+  t.ok(typeof response.onUpdate === 'function', 'response has onUpdate')
+  t.ok(typeof response.await === 'function', 'response has await')
+
+  const images = []
+  await response.onUpdate(data => {
+    if (data instanceof Uint8Array) images.push(data)
+  }).await()
+
+  t.ok(images.length > 0, 'run produces at least one image')
+})
+
+test('idle | cancel: allowed, no-op', { timeout: 600000 }, async t => {
+  const { model } = await setupModel(t)
+  await model.cancel()
+  t.pass('cancel when idle does not throw')
+})
+
+test('run | cancel: cancels current job', { timeout: 600000 }, async t => {
+  const { model } = await setupModel(t)
+  const response = await model.run(LONG_PARAMS)
+
+  // Cancel inside onUpdate after first progress tick — ensures native generation
+  // is actually active (matches LLM addon's runAndCancelAfterFirstToken pattern)
+  let cancelFired = false
+  const chain = response.onUpdate(async data => {
+    if (cancelFired) return
+    if (typeof data === 'string') {
+      cancelFired = true
+      await model.cancel()
+    }
+  })
+
+  try {
+    await chain.await()
+  } catch (err) {
+    if (!/cancel|aborted|stopp?ed/i.test(err?.message || '')) throw err
+  }
+  t.pass('cancel during run resolves and stops job')
+})
+
+test('run | run: second run() throws busy error', { timeout: 600000 }, async t => {
+  const { model } = await setupModel(t)
+  const firstResponse = await model.run(SHORT_PARAMS)
+  let firstError = null
+  if (typeof firstResponse.onError === 'function') {
+    firstResponse.onError(err => { firstError = err })
+  }
+
+  const result = await Promise.race([
+    model.run(SHORT_PARAMS)
+      .then(() => ({ kind: 'no-throw' }))
+      .catch(err => ({ kind: 'busy', err })),
+    firstResponse.await()
+      .then(() => ({ kind: 'first-done' }))
+      .catch(() => ({ kind: 'first-done' }))
+  ])
+
+  if (result.kind === 'busy') {
+    t.ok(
+      /already set or being processed/.test(result.err.message),
+      'second run() throws "already set or being processed"'
+    )
+  } else if (result.kind === 'first-done') {
+    t.comment('First job finished before second run() was rejected; skipping concurrency assertion')
+    t.pass('first job completed (concurrency assertion skipped)')
+  } else {
+    t.fail('second run() should have thrown busy error while first job was still active')
+  }
+
+  const images = []
+  await firstResponse.onUpdate(data => {
+    if (data instanceof Uint8Array) images.push(data)
+  }).await()
+  t.ok(images.length > 0, 'first response completes with output')
+  t.ok(!firstError, 'first response did not fail')
+})
+
+test('cancel | run: can run again after cancel', { timeout: 600000 }, async t => {
+  const { model } = await setupModel(t)
+
+  // Start a job and cancel after first progress tick
+  const response1 = await model.run(SHORT_PARAMS)
+  let cancelFired = false
+  const chain1 = response1.onUpdate(async data => {
+    if (cancelFired) return
+    if (typeof data === 'string') {
+      cancelFired = true
+      await model.cancel()
+    }
+  })
+  // Wait for the cancelled job to fully settle (resolve or reject)
+  await chain1.await().catch(err => {
+    if (!/cancel|aborted|stopp?ed/i.test(err?.message || '')) throw err
+  })
+
+  // Should be able to run again
+  const response2 = await model.run(SHORT_PARAMS)
+  const images = []
+  await response2.onUpdate(data => {
+    if (data instanceof Uint8Array) images.push(data)
+  }).await()
+
+  t.ok(images.length > 0, 'can run again after cancel')
+})
+
+// Keep event loop alive briefly to let pending async operations complete.
+// Prevents C++ destructors from running while async cleanup is still happening.
+setImmediate(() => {
+  setTimeout(() => {}, 500)
+})
diff --git a/packages/lib-infer-diffusion/test/integration/generate-image-flux2.test.js b/packages/lib-infer-diffusion/test/integration/generate-image-flux2.test.js
new file mode 100644
index 0000000000..6b76b9d3bd
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/generate-image-flux2.test.js
@@ -0,0 +1,161 @@
+'use strict'
+
+const fs = require('bare-fs')
+const path = require('bare-path')
+const os = require('bare-os')
+const test = require('brittle')
+const binding = require('../../binding')
+const ImgStableDiffusion = require('../../index')
+const {
+  ensureModel,
+  detectPlatform,
+  setupJsLogger,
+  isPng
+} = require('./utils')
+
+const proc = require('bare-process')
+
+const platform = detectPlatform()
+const isDarwinX64 = os.platform() === 'darwin' && os.arch() === 'x64'
+const isLinuxArm64 = os.platform() === 'linux' && os.arch() === 'arm64'
+const isMobile = os.platform() === 'ios' || os.platform() === 'android'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+const skip = isMobile || noGpu
+
+const DIFFUSION_MODEL = {
+  name: 'flux-2-klein-4b-Q8_0.gguf',
+  url: 'https://huggingface.co/leejet/FLUX.2-klein-4B-GGUF/resolve/main/flux-2-klein-4b-Q8_0.gguf'
+}
+
+const LLM_MODEL = {
+  name: 'Qwen3-4B-Q4_K_M.gguf',
+  url: 'https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_K_M.gguf'
+}
+
+const VAE_MODEL = {
+  name: 'flux2-vae.safetensors',
+  url: 'https://huggingface.co/Comfy-Org/vae-text-encorder-for-flux-klein-4b/resolve/main/split_files/vae/flux2-vae.safetensors'
+}
+
+test('FLUX.2 klein txt2img — generates a valid PNG image', { timeout: 1800000, skip }, async (t) => {
+  setupJsLogger(binding)
+
+  const [downloadedModelName, modelDir] = await ensureModel({
+    modelName: DIFFUSION_MODEL.name,
+    downloadUrl: DIFFUSION_MODEL.url
+  })
+
+  await ensureModel({
+    modelName: LLM_MODEL.name,
+    downloadUrl: LLM_MODEL.url
+  })
+
+  await ensureModel({
+    modelName: VAE_MODEL.name,
+    downloadUrl: VAE_MODEL.url
+  })
+
+  console.log('\n' + '='.repeat(60))
+  console.log('FLUX.2 [klein] 4B — INTEGRATION TEST')
+  console.log('='.repeat(60))
+  console.log(` Platform  : ${platform}`)
+  console.log(` Model     : ${downloadedModelName}`)
+  console.log(` LLM       : ${LLM_MODEL.name}`)
+  console.log(` VAE       : ${VAE_MODEL.name}`)
+  console.log(` Models dir: ${modelDir}`)
+
+  const modelPath = path.join(modelDir, downloadedModelName)
+  t.ok(fs.existsSync(modelPath), 'Model file exists on disk')
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: modelDir,
+      modelName: downloadedModelName,
+      llmModel: LLM_MODEL.name,
+      vaeModel: VAE_MODEL.name
+    },
+    {
+      threads: 4,
+      device: useCpu ? 'cpu' : 'gpu'
+    }
+  )
+
+  const images = []
+  const progressTicks = []
+
+  try {
+    // ── Load ─────────────────────────────────────────────────────────────────
+    console.log('\n=== Loading model ===')
+    const tLoad = Date.now()
+    await model.load()
+    const loadMs = Date.now() - tLoad
+    console.log(`Loaded in ${(loadMs / 1000).toFixed(1)}s`)
+    t.ok(loadMs < 120000, `Model loaded within 120s (took ${(loadMs / 1000).toFixed(1)}s)`)
+
+    // ── Generate ──────────────────────────────────────────────────────────────
+    console.log('\n=== Generating image ===')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: 'a red fox in a snowy forest, photorealistic',
+      steps: 10,
+      width: 512,
+      height: 512,
+      guidance: 3.5,
+      seed: 42
+    })
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              progressTicks.push(tick)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    const genMs = Date.now() - tGen
+    console.log(`\nGenerated in ${(genMs / 1000).toFixed(1)}s`)
+
+    // ── Assertions ────────────────────────────────────────────────────────────
+    t.ok(progressTicks.length > 0, `Received progress ticks (got ${progressTicks.length})`)
+    t.is(progressTicks[progressTicks.length - 1].total, 10, 'Final progress tick reports 10 total steps')
+
+    t.is(images.length, 1, 'Received exactly 1 image')
+
+    const img = images[0]
+    t.ok(img instanceof Uint8Array, 'Image is a Uint8Array')
+    t.ok(img.length > 0, `Image is non-empty (${img.length} bytes)`)
+    t.ok(isPng(img), 'Image has valid PNG magic bytes')
+
+    const outPath = path.join(modelDir, 'generate-image--flux2-txt2img-seed42.png')
+    fs.writeFileSync(outPath, img)
+    console.log(`\nSaved → ${outPath}`)
+
+    // ── Summary ───────────────────────────────────────────────────────────────
+    console.log('\n' + '='.repeat(60))
+    console.log('TEST SUMMARY')
+    console.log('='.repeat(60))
+    console.log(` Load time   : ${(loadMs / 1000).toFixed(1)}s`)
+    console.log(` Gen time    : ${(genMs / 1000).toFixed(1)}s`)
+    console.log(` Steps ticks : ${progressTicks.length}`)
+    console.log(` Image size  : ${img.length} bytes`)
+    console.log(' PNG valid   : true')
+    console.log('='.repeat(60))
+  } finally {
+    console.log('\n=== Cleanup ===')
+    await model.unload().catch(() => {})
+    try {
+      binding.releaseLogger()
+    } catch (_) {}
+    console.log('Done.')
+  }
+})
diff --git a/packages/lib-infer-diffusion/test/integration/generate-image-sd3.test.js b/packages/lib-infer-diffusion/test/integration/generate-image-sd3.test.js
new file mode 100644
index 0000000000..92ec2475eb
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/generate-image-sd3.test.js
@@ -0,0 +1,141 @@
+'use strict'
+
+const fs = require('bare-fs')
+const path = require('bare-path')
+const os = require('bare-os')
+const test = require('brittle')
+const binding = require('../../binding')
+const ImgStableDiffusion = require('../../index')
+const {
+  ensureModel,
+  detectPlatform,
+  setupJsLogger,
+  isPng
+} = require('./utils')
+
+const proc = require('bare-process')
+
+const platform = detectPlatform()
+const isDarwinX64 = os.platform() === 'darwin' && os.arch() === 'x64'
+const isLinuxArm64 = os.platform() === 'linux' && os.arch() === 'arm64'
+const isMobile = os.platform() === 'ios' || os.platform() === 'android'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+const skip = isMobile || noGpu
+
+const DEFAULT_MODEL = {
+  name: 'sd3_medium_incl_clips.safetensors',
+  url: 'https://huggingface.co/adamo1139/stable-diffusion-3-medium-ungated/resolve/main/sd3_medium_incl_clips.safetensors'
+}
+
+test('SD3 Medium txt2img — generates a valid PNG image', { timeout: 900000, skip }, async (t) => {
+  setupJsLogger(binding)
+
+  const [downloadedModelName, modelDir] = await ensureModel({
+    modelName: DEFAULT_MODEL.name,
+    downloadUrl: DEFAULT_MODEL.url
+  })
+
+  console.log('\n' + '='.repeat(60))
+  console.log('STABLE DIFFUSION 3 MEDIUM — INTEGRATION TEST')
+  console.log('='.repeat(60))
+  console.log(` Platform  : ${platform}`)
+  console.log(` Model     : ${downloadedModelName}`)
+  console.log(` Models dir: ${modelDir}`)
+
+  const modelPath = path.join(modelDir, downloadedModelName)
+  t.ok(fs.existsSync(modelPath), 'Model file exists on disk')
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: modelDir,
+      modelName: downloadedModelName
+    },
+    {
+      threads: 4,
+      device: useCpu ? 'cpu' : 'gpu',
+      prediction: 'flow',
+      flow_shift: '3.0'
+    }
+  )
+
+  const images = []
+  const progressTicks = []
+
+  try {
+    // ── Load ─────────────────────────────────────────────────────────────────
+    console.log('\n=== Loading model ===')
+    const tLoad = Date.now()
+    await model.load()
+    const loadMs = Date.now() - tLoad
+    console.log(`Loaded in ${(loadMs / 1000).toFixed(1)}s`)
+    t.ok(loadMs < 120000, `Model loaded within 120s (took ${(loadMs / 1000).toFixed(1)}s)`)
+
+    // ── Generate ──────────────────────────────────────────────────────────────
+    console.log('\n=== Generating image ===')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: 'a red fox in a snowy forest, photorealistic',
+      negative_prompt: 'blurry, low quality, watermark',
+      steps: 10,
+      width: 512,
+      height: 512,
+      cfg_scale: 5.0,
+      sampling_method: 'euler',
+      seed: 42
+    })
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              progressTicks.push(tick)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    const genMs = Date.now() - tGen
+    console.log(`\nGenerated in ${(genMs / 1000).toFixed(1)}s`)
+
+    // ── Assertions ────────────────────────────────────────────────────────────
+    t.ok(progressTicks.length > 0, `Received progress ticks (got ${progressTicks.length})`)
+    t.is(progressTicks[progressTicks.length - 1].total, 10, 'Final progress tick reports 10 total steps')
+
+    t.is(images.length, 1, 'Received exactly 1 image')
+
+    const img = images[0]
+    t.ok(img instanceof Uint8Array, 'Image is a Uint8Array')
+    t.ok(img.length > 0, `Image is non-empty (${img.length} bytes)`)
+    t.ok(isPng(img), 'Image has valid PNG magic bytes')
+
+    const outPath = path.join(modelDir, 'generate-image--sd3-txt2img-seed42.png')
+    fs.writeFileSync(outPath, img)
+    console.log(`\nSaved → ${outPath}`)
+
+    // ── Summary ───────────────────────────────────────────────────────────────
+    console.log('\n' + '='.repeat(60))
+    console.log('TEST SUMMARY')
+    console.log('='.repeat(60))
+    console.log(` Load time   : ${(loadMs / 1000).toFixed(1)}s`)
+    console.log(` Gen time    : ${(genMs / 1000).toFixed(1)}s`)
+    console.log(` Steps ticks : ${progressTicks.length}`)
+    console.log(` Image size  : ${img.length} bytes`)
+    console.log(' PNG valid   : true')
+    console.log('='.repeat(60))
+  } finally {
+    console.log('\n=== Cleanup ===')
+    await model.unload().catch(() => {})
+    try {
+      binding.releaseLogger()
+    } catch (_) {}
+    console.log('Done.')
+  }
+})
diff --git a/packages/lib-infer-diffusion/test/integration/generate-image-sdxl.test.js b/packages/lib-infer-diffusion/test/integration/generate-image-sdxl.test.js
new file mode 100644
index 0000000000..3985c26633
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/generate-image-sdxl.test.js
@@ -0,0 +1,139 @@
+'use strict'
+
+const fs = require('bare-fs')
+const path = require('bare-path')
+const os = require('bare-os')
+const test = require('brittle')
+const binding = require('../../binding')
+const ImgStableDiffusion = require('../../index')
+const {
+  ensureModel,
+  detectPlatform,
+  setupJsLogger,
+  isPng
+} = require('./utils')
+
+const proc = require('bare-process')
+
+const platform = detectPlatform()
+const isDarwinX64 = os.platform() === 'darwin' && os.arch() === 'x64'
+const isLinuxArm64 = os.platform() === 'linux' && os.arch() === 'arm64'
+const isMobile = os.platform() === 'ios' || os.platform() === 'android'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+const skip = isMobile || noGpu
+
+const DEFAULT_MODEL = {
+  name: 'stable-diffusion-xl-base-1.0-Q4_0.gguf',
+  url: 'https://huggingface.co/gpustack/stable-diffusion-xl-base-1.0-GGUF/resolve/main/stable-diffusion-xl-base-1.0-Q4_0.gguf'
+}
+
+test('SDXL txt2img — generates a valid PNG image', { timeout: 900000, skip }, async (t) => {
+  setupJsLogger(binding)
+
+  const [downloadedModelName, modelDir] = await ensureModel({
+    modelName: DEFAULT_MODEL.name,
+    downloadUrl: DEFAULT_MODEL.url
+  })
+
+  console.log('\n' + '='.repeat(60))
+  console.log('STABLE DIFFUSION XL BASE 1.0 — INTEGRATION TEST')
+  console.log('='.repeat(60))
+  console.log(` Platform  : ${platform}`)
+  console.log(` Model     : ${downloadedModelName}`)
+  console.log(` Models dir: ${modelDir}`)
+
+  const modelPath = path.join(modelDir, downloadedModelName)
+  t.ok(fs.existsSync(modelPath), 'Model file exists on disk')
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: modelDir,
+      modelName: downloadedModelName
+    },
+    {
+      threads: 4,
+      device: useCpu ? 'cpu' : 'gpu'
+
+    }
+  )
+
+  const images = []
+  const progressTicks = []
+
+  try {
+    // ── Load ─────────────────────────────────────────────────────────────────
+    console.log('\n=== Loading model ===')
+    const tLoad = Date.now()
+    await model.load()
+    const loadMs = Date.now() - tLoad
+    console.log(`Loaded in ${(loadMs / 1000).toFixed(1)}s`)
+    t.ok(loadMs < 120000, `Model loaded within 120s (took ${(loadMs / 1000).toFixed(1)}s)`)
+
+    // ── Generate ──────────────────────────────────────────────────────────────
+    console.log('\n=== Generating image ===')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: 'a red fox in a snowy forest, photorealistic',
+      negative_prompt: 'blurry, low quality, watermark',
+      steps: 10,
+      width: 1024,
+      height: 1024,
+      cfg_scale: 6.5,
+      seed: 15
+    })
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              progressTicks.push(tick)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    const genMs = Date.now() - tGen
+    console.log(`\nGenerated in ${(genMs / 1000).toFixed(1)}s`)
+
+    // ── Assertions ────────────────────────────────────────────────────────────
+    t.ok(progressTicks.length > 0, `Received progress ticks (got ${progressTicks.length})`)
+    t.is(progressTicks[progressTicks.length - 1].total, 10, 'Final progress tick reports 10 total steps')
+
+    t.is(images.length, 1, 'Received exactly 1 image')
+
+    const img = images[0]
+    t.ok(img instanceof Uint8Array, 'Image is a Uint8Array')
+    t.ok(img.length > 0, `Image is non-empty (${img.length} bytes)`)
+    t.ok(isPng(img), 'Image has valid PNG magic bytes')
+
+    const outPath = path.join(modelDir, 'generate-image--sdxl-txt2img-seed15.png')
+    fs.writeFileSync(outPath, img)
+    console.log(`\nSaved → ${outPath}`)
+
+    // ── Summary ───────────────────────────────────────────────────────────────
+    console.log('\n' + '='.repeat(60))
+    console.log('TEST SUMMARY')
+    console.log('='.repeat(60))
+    console.log(` Load time   : ${(loadMs / 1000).toFixed(1)}s`)
+    console.log(` Gen time    : ${(genMs / 1000).toFixed(1)}s`)
+    console.log(` Steps ticks : ${progressTicks.length}`)
+    console.log(` Image size  : ${img.length} bytes`)
+    console.log(' PNG valid   : true')
+    console.log('='.repeat(60))
+  } finally {
+    console.log('\n=== Cleanup ===')
+    await model.unload().catch(() => {})
+    try {
+      binding.releaseLogger()
+    } catch (_) {}
+    console.log('Done.')
+  }
+})
diff --git a/packages/lib-infer-diffusion/test/integration/generate-image.test.js b/packages/lib-infer-diffusion/test/integration/generate-image.test.js
new file mode 100644
index 0000000000..3cc175a188
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/generate-image.test.js
@@ -0,0 +1,140 @@
+'use strict'
+
+const fs = require('bare-fs')
+const path = require('bare-path')
+const os = require('bare-os')
+const proc = require('bare-process')
+const test = require('brittle')
+const binding = require('../../binding')
+const ImgStableDiffusion = require('../../index')
+const {
+  ensureModel,
+  detectPlatform,
+  setupJsLogger,
+  isPng
+} = require('./utils')
+
+const platform = detectPlatform()
+const isDarwinX64 = os.platform() === 'darwin' && os.arch() === 'x64'
+const isLinuxArm64 = os.platform() === 'linux' && os.arch() === 'arm64'
+const isMobile = os.platform() === 'ios' || os.platform() === 'android'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+const skip = isMobile || noGpu
+
+const DEFAULT_MODEL = {
+  name: 'stable-diffusion-v2-1-Q8_0.gguf',
+  url: 'https://huggingface.co/gpustack/stable-diffusion-v2-1-GGUF/resolve/main/stable-diffusion-v2-1-Q8_0.gguf'
+}
+
+test('SD2.1 txt2img — generates a valid PNG image', { timeout: 600000, skip }, async (t) => {
+  setupJsLogger(binding)
+
+  const [downloadedModelName, modelDir] = await ensureModel({
+    modelName: DEFAULT_MODEL.name,
+    downloadUrl: DEFAULT_MODEL.url
+  })
+
+  console.log('\n' + '='.repeat(60))
+  console.log('STABLE DIFFUSION 2.1 — INTEGRATION TEST')
+  console.log('='.repeat(60))
+  console.log(` Platform  : ${platform}`)
+  console.log(` Model     : ${downloadedModelName}`)
+  console.log(` Models dir: ${modelDir}`)
+
+  const modelPath = path.join(modelDir, downloadedModelName)
+  t.ok(fs.existsSync(modelPath), 'Model file exists on disk')
+
+  const model = new ImgStableDiffusion(
+    {
+      logger: console,
+      diskPath: modelDir,
+      modelName: downloadedModelName
+    },
+    {
+      threads: 4,
+      device: useCpu ? 'cpu' : 'gpu',
+      prediction: 'v' // SD2.1 uses v-prediction
+    }
+  )
+
+  const images = []
+  const progressTicks = []
+
+  try {
+    // ── Load ─────────────────────────────────────────────────────────────────
+    console.log('\n=== Loading model ===')
+    const tLoad = Date.now()
+    await model.load()
+    const loadMs = Date.now() - tLoad
+    console.log(`Loaded in ${(loadMs / 1000).toFixed(1)}s`)
+    t.ok(loadMs < 120000, `Model loaded within 120s (took ${(loadMs / 1000).toFixed(1)}s)`)
+
+    // ── Generate ──────────────────────────────────────────────────────────────
+    console.log('\n=== Generating image ===')
+    const tGen = Date.now()
+
+    const response = await model.run({
+      prompt: 'a red fox in a snowy forest, photorealistic',
+      negative_prompt: 'blurry, low quality, watermark',
+      steps: 10,
+      width: 712,
+      height: 712,
+      cfg_scale: 7.5,
+      seed: 42 // fixed seed for reproducibility
+    })
+
+    await response
+      .onUpdate((data) => {
+        if (data instanceof Uint8Array) {
+          images.push(data)
+        } else if (typeof data === 'string') {
+          try {
+            const tick = JSON.parse(data)
+            if ('step' in tick && 'total' in tick) {
+              progressTicks.push(tick)
+            }
+          } catch (_) {}
+        }
+      })
+      .await()
+
+    const genMs = Date.now() - tGen
+    console.log(`\nGenerated in ${(genMs / 1000).toFixed(1)}s`)
+
+    // ── Assertions ────────────────────────────────────────────────────────────
+    t.ok(progressTicks.length > 0, `Received progress ticks (got ${progressTicks.length})`)
+    t.is(progressTicks[progressTicks.length - 1].total, 10, 'Final progress tick reports 10 total steps')
+
+    t.is(images.length, 1, 'Received exactly 1 image')
+
+    const img = images[0]
+    t.ok(img instanceof Uint8Array, 'Image is a Uint8Array')
+    t.ok(img.length > 0, `Image is non-empty (${img.length} bytes)`)
+    t.ok(isPng(img), 'Image has valid PNG magic bytes')
+
+    // Save output for CI artifact upload — filename encodes test origin
+    // Saved to modelDir so mobile has write permission to the same path
+    const outPath = path.join(modelDir, 'generate-image--sd2-txt2img-seed42.png')
+    fs.writeFileSync(outPath, img)
+    console.log(`\nSaved → ${outPath}`)
+
+    // ── Summary ───────────────────────────────────────────────────────────────
+    console.log('\n' + '='.repeat(60))
+    console.log('TEST SUMMARY')
+    console.log('='.repeat(60))
+    console.log(` Load time   : ${(loadMs / 1000).toFixed(1)}s`)
+    console.log(` Gen time    : ${(genMs / 1000).toFixed(1)}s`)
+    console.log(` Steps ticks : ${progressTicks.length}`)
+    console.log(` Image size  : ${img.length} bytes`)
+    console.log(' PNG valid   : true')
+    console.log('='.repeat(60))
+  } finally {
+    console.log('\n=== Cleanup ===')
+    await model.unload()
+    try {
+      binding.releaseLogger()
+    } catch (_) {}
+    console.log('Done.')
+  }
+})
diff --git a/packages/lib-infer-diffusion/test/integration/model-loading.test.js b/packages/lib-infer-diffusion/test/integration/model-loading.test.js
new file mode 100644
index 0000000000..6adef92070
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/model-loading.test.js
@@ -0,0 +1,55 @@
+'use strict'
+
+const test = require('brittle')
+const os = require('bare-os')
+const proc = require('bare-process')
+
+const ImgStableDiffusion = require('../../index.js')
+const { ensureModel } = require('./utils')
+
+const platform = os.platform()
+const arch = os.arch()
+const isDarwinX64 = platform === 'darwin' && arch === 'x64'
+const isLinuxArm64 = platform === 'linux' && arch === 'arm64'
+const noGpu = proc.env && proc.env.NO_GPU === 'true'
+const useCpu = isDarwinX64 || isLinuxArm64 || noGpu
+
+const DEFAULT_MODEL = {
+  name: 'stable-diffusion-v2-1-Q8_0.gguf',
+  url: 'https://huggingface.co/gpustack/stable-diffusion-v2-1-GGUF/resolve/main/stable-diffusion-v2-1-Q8_0.gguf'
+}
+
+test('model loading - load and unload', { timeout: 600_000 }, async t => {
+  const [downloadedModelName, modelDir] = await ensureModel({
+    modelName: DEFAULT_MODEL.name,
+    downloadUrl: DEFAULT_MODEL.url
+  })
+
+  const config = {
+    threads: '4',
+    device: useCpu ? 'cpu' : 'gpu',
+    prediction: 'v'
+  }
+
+  const addon = new ImgStableDiffusion({
+    modelName: downloadedModelName,
+    diskPath: modelDir,
+    logger: console
+  }, config)
+
+  await addon.load()
+  t.pass('model loaded successfully')
+
+  await addon.unload()
+  t.pass('model unloaded successfully')
+
+  await addon.unload().catch(() => {})
+  t.pass('second unload is idempotent')
+})
+
+// Keep event loop alive briefly to let pending async operations complete
+// This prevents C++ destructors from running while async cleanup is still happening
+// which can cause segfaults (exit code 139)
+setImmediate(() => {
+  setTimeout(() => {}, 500)
+})
diff --git a/packages/lib-infer-diffusion/test/integration/utils.js b/packages/lib-infer-diffusion/test/integration/utils.js
new file mode 100644
index 0000000000..c4959d2b56
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/integration/utils.js
@@ -0,0 +1,199 @@
+'use strict'
+const fs = require('bare-fs')
+const path = require('bare-path')
+const https = require('bare-https')
+const os = require('bare-os')
+
+async function downloadFile (url, dest) {
+  return new Promise((resolve, reject) => {
+    let resolved = false
+    const safeResolve = () => {
+      if (!resolved) {
+        resolved = true
+        resolve()
+      }
+    }
+    const safeReject = (err) => {
+      if (!resolved) {
+        resolved = true
+        reject(err)
+      }
+    }
+
+    const file = fs.createWriteStream(dest)
+
+    file.on('error', (err) => {
+      file.destroy()
+      fs.unlink(dest, () => safeReject(err))
+    })
+
+    const req = https.request(url, response => {
+      // Handle redirects (added 307, 308 for Windows model download)
+      if ([301, 302, 307, 308].includes(response.statusCode)) {
+        file.destroy()
+        // Wait for unlink to complete before recursive call (fixes Windows race condition)
+        fs.unlink(dest, (unlinkErr) => {
+          // Ignore ENOENT - file may not exist yet
+          if (unlinkErr && unlinkErr.code !== 'ENOENT') {
+            return safeReject(unlinkErr)
+          }
+
+          let redirectUrl = response.headers.location
+          // Handle relative redirects
+          if (redirectUrl.startsWith('/')) {
+            const originalUrl = new URL(url)
+            redirectUrl = `${originalUrl.protocol}//${originalUrl.host}${redirectUrl}`
+          }
+
+          downloadFile(redirectUrl, dest)
+            .then(safeResolve)
+            .catch(safeReject)
+        })
+        return
+      }
+
+      if (response.statusCode !== 200) {
+        file.destroy()
+        fs.unlink(dest, () => safeReject(new Error(`Download failed: HTTP ${response.statusCode} from ${url}`)))
+        return
+      }
+
+      response.on('error', (err) => {
+        file.destroy()
+        fs.unlink(dest, () => safeReject(err))
+      })
+
+      response.pipe(file)
+
+      // Wait for 'close' event to ensure data is fully flushed to disk (important on Windows)
+      file.on('close', () => {
+        safeResolve()
+      })
+    })
+
+    req.on('error', err => {
+      file.destroy()
+      fs.unlink(dest, () => safeReject(err))
+    })
+
+    req.end()
+  })
+}
+
+async function ensureModel ({ modelName, downloadUrl }) {
+  const modelDir = path.resolve(__dirname, '../model')
+
+  const modelPath = path.join(modelDir, modelName)
+
+  if (fs.existsSync(modelPath)) {
+    return [modelName, modelDir]
+  }
+
+  fs.mkdirSync(modelDir, { recursive: true })
+  console.log(`Downloading test model ${modelName}...`)
+
+  await downloadFile(downloadUrl, modelPath)
+
+  const stats = fs.statSync(modelPath)
+  console.log(`Model ready: ${(stats.size / 1024 / 1024).toFixed(1)}MB`)
+  return [modelName, modelDir]
+}
+
+async function ensureModelPath ({ modelName, downloadUrl }) {
+  const [downloadedModelName, modelDir] = await ensureModel({ modelName, downloadUrl })
+  return path.join(modelDir, downloadedModelName)
+}
+
+/**
+ * Get path to a media file - works on both desktop and mobile
+ * On mobile, media files must be in testAssets/
+ * On desktop, media files are in addon root /media/
+ */
+function getMediaPath (filename) {
+  const isMobile = os.platform() === 'ios' || os.platform() === 'android'
+  if (isMobile && global.assetPaths) {
+    const projectPath = `../../testAssets/${filename}`
+
+    if (global.assetPaths[projectPath]) {
+      const resolvedPath = global.assetPaths[projectPath].replace('file://', '')
+      return resolvedPath
+    }
+    throw new Error(`Asset not found in testAssets: ${filename}. Make sure ${filename} is in testAssets/ directory and rebuild the app.`)
+  }
+
+  return path.resolve(__dirname, '../../media', filename)
+}
+
+/**
+ * Factory to create a shared onOutput handler for image generation.
+ */
+function makeOutputCollector (t, logger = console) {
+  const outputData = {}
+  let jobCompleted = false
+  let generatedData = null
+  let stats = null
+
+  function onOutput (addon, event, jobId, output, error) {
+    if (event === 'Output') {
+      if (!outputData[jobId]) {
+        outputData[jobId] = []
+      }
+      outputData[jobId].push(output)
+      generatedData = output
+    } else if (event === 'Error') {
+      t.fail(`Job ${jobId} error: ${error}`)
+    } else if (event === 'JobEnded') {
+      stats = output
+      logger.log(`Job ${jobId} completed.`)
+      if (stats) {
+        logger.log(`Job ${jobId} stats: ${JSON.stringify(stats)}`)
+      }
+      jobCompleted = true
+    }
+  }
+
+  return {
+    onOutput,
+    outputData,
+    get generatedData () { return generatedData },
+    get jobCompleted () { return jobCompleted },
+    get stats () { return stats }
+  }
+}
+
+function detectPlatform () {
+  return `${os.platform()}-${os.arch()}`
+}
+
+function setupJsLogger (binding) {
+  const LOG_PRIORITIES = ['ERROR', 'WARNING', 'INFO', 'DEBUG']
+  binding.setLogger((priority, message) => {
+    const label = LOG_PRIORITIES[priority] || `UNKNOWN(${priority})`
+    console.log(`[C++ ${label}] ${message}`)
+  })
+  return binding
+}
+
+function isPng (buf) {
+  if (!buf || buf.length < 8) return false
+  return (
+    buf[0] === 0x89 &&
+    buf[1] === 0x50 &&
+    buf[2] === 0x4E &&
+    buf[3] === 0x47 &&
+    buf[4] === 0x0D &&
+    buf[5] === 0x0A &&
+    buf[6] === 0x1A &&
+    buf[7] === 0x0A
+  )
+}
+
+module.exports = {
+  ensureModel,
+  ensureModelPath,
+  getMediaPath,
+  makeOutputCollector,
+  detectPlatform,
+  setupJsLogger,
+  isPng
+}
diff --git a/packages/lib-infer-diffusion/test/mobile/README.md b/packages/lib-infer-diffusion/test/mobile/README.md
new file mode 100644
index 0000000000..0898e9659a
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/mobile/README.md
@@ -0,0 +1,67 @@
+# Mobile Testing for Stable Diffusion
+
+This directory contains the mobile test configuration for the `@qvac/diffusion-cpp` addon.
+
+> **Note**: This test directory is included in the published npm package to support the mobile testing framework. These test files are NOT part of the public API and should only be used by the internal mobile testing infrastructure.
+
+## Test Structure
+
+- `integration-runtime.cjs` - Shared runtime that provides `runIntegrationModule` global
+- `integration.auto.cjs` - Auto-generated wrappers for each integration test
+- `testAssets/` - Directory for model files and test data
+
+## Setup
+
+### Download Test Model
+
+The test requires a Stable Diffusion model file. Download it to the `testAssets` directory:
+
+```bash
+cd test/mobile/testAssets
+
+# Download a quantized SD model
+curl -L -o sd-v1-4-Q4_0.gguf <model-url>
+```
+
+## Running the Test
+
+From the mobile tester app root:
+
+```bash
+# Build the test app with diffusion-cpp
+npm run build ../lib-infer-diffusion
+
+# Run on Android
+npm run android
+
+# Run on iOS
+npm run ios
+```
+
+## Regenerating Tests
+
+After adding or removing integration test files:
+
+```bash
+npm run test:mobile:generate
+```
+
+To validate that auto-generated tests are in sync:
+
+```bash
+npm run test:mobile:validate
+```
+
+## Troubleshooting
+
+### Model file not found
+- Ensure the model file is in the `testAssets/` directory
+- Check that the file downloaded completely
+
+### Out of memory
+- SD models are larger than LLM models; use quantized (Q4) variants for testing
+- Close other apps to free memory
+
+### Timeout errors
+- Image generation can be slow on mobile devices
+- The test waits up to 600 seconds for generation
diff --git a/packages/lib-infer-diffusion/test/mobile/integration-runtime.cjs b/packages/lib-infer-diffusion/test/mobile/integration-runtime.cjs
new file mode 100644
index 0000000000..4376bce841
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/mobile/integration-runtime.cjs
@@ -0,0 +1,20 @@
+'use strict'
+
+const path = require('bare-path')
+const fs = require('bare-fs')
+const { pathToFileURL } = require('bare-url')
+
+async function runIntegrationModule (relativeModulePath, options = {}) {
+  const modulePath = path.join(__dirname, relativeModulePath)
+
+  if (!fs.existsSync(modulePath)) {
+    console.warn(`[integration-runner] Missing module: ${relativeModulePath}`)
+    return 'missing'
+  }
+
+  const moduleUrl = pathToFileURL(modulePath).href
+  await import(moduleUrl)
+  return modulePath
+}
+
+global.runIntegrationModule = runIntegrationModule
diff --git a/packages/lib-infer-diffusion/test/mobile/integration.auto.cjs b/packages/lib-infer-diffusion/test/mobile/integration.auto.cjs
new file mode 100644
index 0000000000..046e5e317c
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/mobile/integration.auto.cjs
@@ -0,0 +1,31 @@
+'use strict'
+require('./integration-runtime.cjs')
+
+// AUTO-GENERATED FILE. Run `npm run test:mobile:generate` to update.
+// Each function mirrors a single file under test/integration/.
+
+/* global runIntegrationModule */
+
+async function runApiBehaviorTest (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/api-behavior.test.js', options)
+}
+
+async function runGenerateImageFlux2Test (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/generate-image-flux2.test.js', options)
+}
+
+async function runGenerateImageSd3Test (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/generate-image-sd3.test.js', options)
+}
+
+async function runGenerateImageSdxlTest (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/generate-image-sdxl.test.js', options)
+}
+
+async function runGenerateImageTest (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/generate-image.test.js', options)
+}
+
+async function runModelLoadingTest (options = {}) { // eslint-disable-line no-unused-vars
+  return runIntegrationModule('../integration/model-loading.test.js', options)
+}
diff --git a/packages/lib-infer-diffusion/test/mobile/testAssets/.gitignore b/packages/lib-infer-diffusion/test/mobile/testAssets/.gitignore
new file mode 100644
index 0000000000..18f8dd90ad
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/mobile/testAssets/.gitignore
@@ -0,0 +1,8 @@
+# Ignore model files (can be large)
+*.gguf
+*.bin
+*.safetensors
+*.ckpt
+
+# Keep this directory in git
+!.gitignore
diff --git a/packages/lib-infer-diffusion/test/unit/CMakeLists.txt b/packages/lib-infer-diffusion/test/unit/CMakeLists.txt
new file mode 100644
index 0000000000..a2a5a0a96e
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/CMakeLists.txt
@@ -0,0 +1,120 @@
+# This file is included as a subdirectory from the main CMakeLists.txt
+# GTest is already found in the parent, but we can find it here too for clarity
+find_package(GTest CONFIG REQUIRED)
+
+# ENABLE_COVERAGE option should be set in parent CMakeLists.txt
+# If not set, default to OFF
+if(NOT DEFINED ENABLE_COVERAGE)
+  set(ENABLE_COVERAGE OFF)
+endif()
+
+add_executable(
+  addon-test
+  test_backend_selection.cpp
+  test_sd_model.cpp
+  test_model_loading.cpp
+  test_single_step_inference.cpp
+  test_full_generation.cpp
+  test_cancel_context.cpp
+  test_sd_gen_handlers.cpp
+  ${CMAKE_SOURCE_DIR}/addon/src/utils/BackendSelection.cpp
+  ${CMAKE_SOURCE_DIR}/addon/src/utils/LoggingMacros.cpp
+  ${CMAKE_SOURCE_DIR}/addon/src/model-interface/SdModel.cpp
+  ${CMAKE_SOURCE_DIR}/addon/src/handlers/SdCtxHandlers.cpp
+  ${CMAKE_SOURCE_DIR}/addon/src/handlers/SdGenHandlers.cpp
+)
+
+target_compile_options(
+  addon-test
+  PRIVATE
+    -Wno-deprecated
+    -Wfatal-errors
+    -g)
+
+if(NOT WIN32)
+  target_compile_options(
+    addon-test
+    PRIVATE
+      -fno-omit-frame-pointer
+  )
+endif()
+
+target_include_directories(
+  addon-test
+  PRIVATE
+  ${CMAKE_SOURCE_DIR}/addon/src
+  ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS}
+)
+
+# Set C++20 standard for std::views support (required by qvac-lib-inference-addon-cpp)
+target_compile_features(addon-test PRIVATE cxx_std_20)
+
+# Set macOS deployment target for C++20 std::format support
+if(APPLE AND NOT IOS)
+  set_target_properties(addon-test PROPERTIES
+    XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "14.0"
+    OSX_DEPLOYMENT_TARGET "14.0"
+  )
+endif()
+
+# Define test binary directory path for runtime use
+get_filename_component(TEST_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}" ABSOLUTE)
+target_compile_definitions(addon-test PRIVATE
+  TEST_BINARY_DIR="${TEST_BINARY_DIR}"
+  PROJECT_ROOT="${CMAKE_SOURCE_DIR}"
+)
+
+# Optional coverage instrumentation
+if(ENABLE_COVERAGE)
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
+    MESSAGE(STATUS "ENABLING_PROFILING")
+
+    target_compile_options(addon-test PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+    target_link_options(addon-test PRIVATE -fprofile-instr-generate)
+  elseif(MSVC)
+    message(WARNING "ENABLE_COVERAGE is not supported on MSVC in this script.")
+  endif()
+endif()
+
+target_link_libraries(
+  addon-test
+  PRIVATE
+    stable-diffusion::stable-diffusion
+    ggml::ggml
+    GTest::gtest
+    GTest::gtest_main
+    GTest::gmock_main
+)
+
+if(NOT WIN32 AND NOT APPLE)
+  target_link_libraries(
+    addon-test
+    PRIVATE
+      -fsanitize=address
+  )
+endif()
+
+if(WIN32)
+  target_link_libraries(
+    addon-test
+    PRIVATE
+      msvcrt.lib
+  )
+endif()
+
+# Copy ggml backend libraries next to the test executable
+if((ANDROID OR UNIX) AND NOT APPLE)
+  foreach(_backend ${GGML_AVAILABLE_BACKENDS})
+    add_custom_command(TARGET addon-test POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        $<TARGET_FILE:ggml::${_backend}>
+        ${CMAKE_CURRENT_BINARY_DIR}
+      COMMENT "Copying ggml backend ${_backend} to test directory"
+    )
+  endforeach()
+endif()
+
+add_test(NAME SdModelTests COMMAND addon-test)
+set_tests_properties(SdModelTests PROPERTIES
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+  TIMEOUT 900)  # 15 minutes
diff --git a/packages/lib-infer-diffusion/test/unit/test_backend_selection.cpp b/packages/lib-infer-diffusion/test/unit/test_backend_selection.cpp
new file mode 100644
index 0000000000..2c0b15c9cf
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_backend_selection.cpp
@@ -0,0 +1,38 @@
+#include <string>
+#include <unordered_map>
+
+#include <gtest/gtest.h>
+
+#include "utils/BackendSelection.hpp"
+
+using namespace sd_backend_selection;
+
+class SdBackendSelectionTest : public ::testing::Test {
+protected:
+  std::unordered_map<std::string, std::string> configMap;
+
+  void SetUp() override { configMap.clear(); }
+};
+
+TEST_F(SdBackendSelectionTest, DeviceGpuReturnsGPU) {
+  configMap["device"] = "gpu";
+  EXPECT_EQ(preferredDeviceFromMap(configMap), BackendDevice::GPU);
+}
+
+TEST_F(SdBackendSelectionTest, DeviceCpuReturnsCPU) {
+  configMap["device"] = "cpu";
+  EXPECT_EQ(preferredDeviceFromMap(configMap), BackendDevice::CPU);
+}
+
+TEST_F(SdBackendSelectionTest, MissingDeviceDefaultsToGPU) {
+  EXPECT_EQ(preferredDeviceFromMap(configMap), BackendDevice::GPU);
+}
+
+TEST_F(SdBackendSelectionTest, ThreadsFromMapReturnsValue) {
+  configMap["threads"] = "8";
+  EXPECT_EQ(threadsFromMap(configMap), 8);
+}
+
+TEST_F(SdBackendSelectionTest, ThreadsFromMapDefaultsToAuto) {
+  EXPECT_EQ(threadsFromMap(configMap), -1);
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_cancel_context.cpp b/packages/lib-infer-diffusion/test/unit/test_cancel_context.cpp
new file mode 100644
index 0000000000..1b42d1a3fb
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_cancel_context.cpp
@@ -0,0 +1,251 @@
+#include <any>
+#include <atomic>
+#include <chrono>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "model-interface/SdModel.hpp"
+#include "test_common.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+
+// ---------------------------------------------------------------------------
+// Test fixture — loads the model once per suite (expensive)
+// ---------------------------------------------------------------------------
+class SdCancelContextTest : public ::testing::Test {
+protected:
+  static std::unique_ptr<SdModel> model;
+
+  static void SetUpTestSuite() {
+    const auto path = sd_test_helpers::getModelPath();
+    if (path.empty())
+      return;
+
+    SdCtxConfig config{};
+    config.modelPath = path;
+    config.prediction = V_PRED;
+    config.nThreads = sd_test_helpers::getTestThreads();
+    config.device = sd_test_helpers::getTestDevice();
+    // freeParamsImmediately defaults to false in our config — this is what
+    // we're testing: the model must survive multiple generations and
+    // cancel-then-rerun without segfaulting.
+
+    model = std::make_unique<SdModel>(std::move(config));
+    model->load();
+  }
+
+  static void TearDownTestSuite() {
+    model.reset(); // destructor releases GPU memory
+  }
+
+  void SetUp() override {
+    if (!model)
+      GTEST_SKIP() << "SD2.1 model not available — set SD_TEST_MODEL_PATH or "
+                      "download to test/model/";
+  }
+
+  // Helper: build a GenerationJob with many steps (gives time to cancel)
+  static SdModel::GenerationJob makeLongJob(
+      std::atomic<int>& progressSteps,
+      std::vector<std::vector<uint8_t>>& images) {
+    SdModel::GenerationJob job;
+    job.paramsJson = R"({
+      "prompt": "a red fox in snow",
+      "steps": 50,
+      "width": 256,
+      "height": 256,
+      "cfg_scale": 7.5,
+      "seed": 42
+    })";
+    job.progressCallback = [&](const std::string&) {
+      progressSteps.fetch_add(1);
+    };
+    job.outputCallback = [&](const std::vector<uint8_t>& png) {
+      images.push_back(png);
+    };
+    return job;
+  }
+
+  // Helper: build a short GenerationJob (quick completion)
+  static SdModel::GenerationJob
+  makeShortJob(std::vector<std::vector<uint8_t>>& images) {
+    SdModel::GenerationJob job;
+    job.paramsJson = R"({
+      "prompt": "solid white",
+      "steps": 2,
+      "width": 256,
+      "height": 256,
+      "cfg_scale": 7.5,
+      "seed": 1
+    })";
+    job.outputCallback = [&](const std::vector<uint8_t>& png) {
+      images.push_back(png);
+    };
+    return job;
+  }
+};
+
+std::unique_ptr<SdModel> SdCancelContextTest::model = nullptr;
+
+// ---------------------------------------------------------------------------
+// 1. Cancel on idle model is safe (no crash, no state corruption)
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, CancelWhenIdleIsNoop) {
+  EXPECT_NO_THROW(model->cancel());
+  // cancel() sets the flag even when idle — it is only cleared on process()
+  // entry.  This is safe: the flag is reset before the next generation begins.
+  EXPECT_TRUE(model->isCancelRequested());
+
+  std::vector<std::vector<uint8_t>> images;
+  auto job = makeShortJob(images);
+  EXPECT_NO_THROW(model->process(std::any(job)));
+  EXPECT_EQ(images.size(), 1u) << "Short job should produce 1 image";
+}
+
+// ---------------------------------------------------------------------------
+// 2. Cancel during generation throws "Job cancelled" (cancel-as-error)
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, CancelDuringGenerationThrowsJobCancelled) {
+  std::atomic<int> progressSteps{0};
+  std::vector<std::vector<uint8_t>> images;
+  auto job = makeLongJob(progressSteps, images);
+
+  // Fire cancel from another thread after the first progress tick
+  std::thread cancelThread([&] {
+    while (progressSteps.load() < 1)
+      std::this_thread::sleep_for(std::chrono::milliseconds{5});
+    model->cancel();
+  });
+
+  try {
+    model->process(std::any(job));
+    FAIL() << "process() should have thrown on cancel";
+  } catch (const std::runtime_error& e) {
+    EXPECT_STREQ(e.what(), "Job cancelled");
+  } catch (...) {
+    cancelThread.join();
+    FAIL() << "Unexpected exception type";
+  }
+
+  cancelThread.join();
+
+  // No output images should have been emitted (buffers freed, not encoded)
+  EXPECT_EQ(images.size(), 0u) << "Cancelled generation should not emit images";
+}
+
+// ---------------------------------------------------------------------------
+// 3. Model is reusable after cancel (freeParamsImmediately = false)
+//    This is the exact scenario that caused the SIGSEGV before the fix.
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, RunAfterCancelProducesValidOutput) {
+  // First: start and cancel a long job
+  std::atomic<int> progressSteps{0};
+  std::vector<std::vector<uint8_t>> cancelledImages;
+  auto longJob = makeLongJob(progressSteps, cancelledImages);
+
+  std::thread cancelThread([&] {
+    while (progressSteps.load() < 1)
+      std::this_thread::sleep_for(std::chrono::milliseconds{5});
+    model->cancel();
+  });
+
+  try {
+    model->process(std::any(longJob));
+  } catch (const std::runtime_error&) {
+    // expected — "Job cancelled"
+  }
+  cancelThread.join();
+
+  // Second: run a short job on the same model instance.
+  // Before the fix, this would segfault because:
+  //   1. freeParamsImmediately=true freed weight buffers after first run
+  //   2. compute buffer was freed on wrong model (diffusion_model vs
+  //      work_diffusion_model), corrupting sd_ctx state
+  std::vector<std::vector<uint8_t>> images;
+  auto shortJob = makeShortJob(images);
+
+  EXPECT_NO_THROW(model->process(std::any(shortJob)));
+  ASSERT_EQ(images.size(), 1u) << "Rerun after cancel should produce 1 image";
+  EXPECT_TRUE(sd_test_helpers::isPng(images[0])) << "Output must be valid PNG";
+}
+
+// ---------------------------------------------------------------------------
+// 4. Multiple sequential generations work (model reuse without cancel)
+//    Verifies freeParamsImmediately=false doesn't break normal reuse.
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, MultipleSequentialGenerationsSucceed) {
+  for (int i = 0; i < 3; ++i) {
+    std::vector<std::vector<uint8_t>> images;
+    auto job = makeShortJob(images);
+
+    EXPECT_NO_THROW(model->process(std::any(job)))
+        << "Generation " << i << " should not throw";
+    ASSERT_EQ(images.size(), 1u)
+        << "Generation " << i << " should produce 1 image";
+    EXPECT_TRUE(sd_test_helpers::isPng(images[0]))
+        << "Generation " << i << " output must be valid PNG";
+  }
+}
+
+// ---------------------------------------------------------------------------
+// 5. Cancel flag is reset at the start of each process() call
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, CancelFlagResetOnProcessEntry) {
+  // Set cancel flag manually
+  model->cancel();
+  ASSERT_TRUE(model->isCancelRequested());
+
+  // process() should reset it at entry, then run normally
+  std::vector<std::vector<uint8_t>> images;
+  auto job = makeShortJob(images);
+
+  // With only 2 steps and the flag reset at entry, this should complete
+  // normally — the abort callback only fires during denoising, and
+  // cancelRequested_ is false by then.
+  EXPECT_NO_THROW(model->process(std::any(job)));
+  EXPECT_EQ(images.size(), 1u) << "Should produce 1 image";
+}
+
+// ---------------------------------------------------------------------------
+// 6. process() on unloaded model throws (not crash/segfault)
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, ProcessOnUnloadedModelThrows) {
+  SdCtxConfig config{};
+  SdModel unloadedModel(std::move(config));
+
+  std::vector<std::vector<uint8_t>> images;
+  auto job = makeShortJob(images);
+
+  EXPECT_THROW(unloadedModel.process(std::any(job)), std::exception);
+}
+
+// ---------------------------------------------------------------------------
+// 7. Runtime stats are populated after successful generation
+// ---------------------------------------------------------------------------
+TEST_F(SdCancelContextTest, RuntimeStatsPopulatedAfterGeneration) {
+  std::vector<std::vector<uint8_t>> images;
+  auto job = makeShortJob(images);
+
+  model->process(std::any(job));
+
+  auto stats = model->runtimeStats();
+  EXPECT_FALSE(stats.empty()) << "Stats should be populated after generation";
+
+  // Check expected stat keys exist
+  bool hasGenerationMs = false;
+  bool hasTotalImages = false;
+  for (const auto& [key, value] : stats) {
+    if (key == "generationMs") {
+      hasGenerationMs = true;
+    }
+    if (key == "totalImages") {
+      hasTotalImages = true;
+    }
+  }
+  EXPECT_TRUE(hasGenerationMs) << "Stats must include generationMs";
+  EXPECT_TRUE(hasTotalImages) << "Stats must include totalImages";
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_common.hpp b/packages/lib-infer-diffusion/test/unit/test_common.hpp
new file mode 100644
index 0000000000..884ff211ec
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_common.hpp
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <cstdint>
+#include <cstdlib>
+#include <filesystem>
+#include <string>
+#include <vector>
+
+namespace sd_test_helpers {
+
+inline std::string getTestDevice() {
+  if (std::getenv("SD_CPU_ONLY"))
+    return "cpu";
+#if defined(__APPLE__)
+  return "gpu"; // Metal
+#else
+  return "cpu";
+#endif
+}
+
+inline int getTestThreads() { return 4; }
+
+// Returns the absolute path to the SD2.1 Q8_0 model file.
+// Checks SD_TEST_MODEL_PATH env first, then the default location used by the
+// JS integration test runner (test/model/ relative to the package root, which
+// is CMAKE_SOURCE_DIR and the ctest WORKING_DIRECTORY).
+// Returns an empty string when no model is found so callers can GTEST_SKIP().
+inline std::string getModelPath() {
+  if (const char* p = std::getenv("SD_TEST_MODEL_PATH")) {
+    if (std::filesystem::exists(p))
+      return p;
+  }
+
+#ifdef PROJECT_ROOT
+  const std::string root = PROJECT_ROOT;
+#else
+  const std::string root = ".";
+#endif
+
+  const std::string candidates[] = {
+      root + "/test/model/stable-diffusion-v2-1-Q8_0.gguf",
+      root + "/models/stable-diffusion-v2-1-Q8_0.gguf",
+  };
+  for (const auto& path : candidates) {
+    if (std::filesystem::exists(path))
+      return path;
+  }
+
+  return {};
+}
+
+inline bool isPng(const std::vector<uint8_t>& buf) {
+  if (buf.size() < 8)
+    return false;
+  return buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47 &&
+         buf[4] == 0x0D && buf[5] == 0x0A && buf[6] == 0x1A && buf[7] == 0x0A;
+}
+
+} // namespace sd_test_helpers
diff --git a/packages/lib-infer-diffusion/test/unit/test_full_generation.cpp b/packages/lib-infer-diffusion/test/unit/test_full_generation.cpp
new file mode 100644
index 0000000000..35e1e719e6
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_full_generation.cpp
@@ -0,0 +1,127 @@
+#include <any>
+#include <cstdint>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "model-interface/SdModel.hpp"
+#include "test_common.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+
+// ---------------------------------------------------------------------------
+// Mirrors the JS integration test in generate-image.test.js:
+//   model  : stable-diffusion-v2-1-Q8_0.gguf
+//   prompt : "a red fox in a snowy forest, photorealistic"
+//   neg    : "blurry, low quality, watermark"
+//   steps  : 10       cfg_scale : 7.5
+//   width  : 712      height    : 712
+//   seed   : 42       prediction: v (SD2.1 v-prediction)
+// ---------------------------------------------------------------------------
+
+class SdFullGenerationTest : public ::testing::Test {
+protected:
+  static std::unique_ptr<SdModel> model;
+
+  static void SetUpTestSuite() {
+    const auto path = sd_test_helpers::getModelPath();
+    if (path.empty())
+      return;
+
+    SdCtxConfig config{};
+    config.modelPath = path;
+    config.prediction = V_PRED;
+    config.nThreads = sd_test_helpers::getTestThreads();
+    config.device = sd_test_helpers::getTestDevice();
+
+    model = std::make_unique<SdModel>(std::move(config));
+    model->load();
+  }
+
+  static void TearDownTestSuite() {
+    model.reset(); // destructor releases GPU memory
+  }
+
+  void SetUp() override {
+    if (!model)
+      GTEST_SKIP() << "SD2.1 model not available — set SD_TEST_MODEL_PATH or "
+                      "download to test/model/";
+  }
+};
+
+std::unique_ptr<SdModel> SdFullGenerationTest::model = nullptr;
+
+TEST_F(SdFullGenerationTest, Txt2ImgMatchesIntegrationConfig) {
+  std::vector<std::vector<uint8_t>> images;
+  std::vector<std::string> progressTicks;
+  std::mutex mu;
+
+  SdModel::GenerationJob job;
+  job.paramsJson = R"({
+    "prompt": "a red fox in a snowy forest, photorealistic",
+    "negative_prompt": "blurry, low quality, watermark",
+    "steps": 10,
+    "width": 712,
+    "height": 712,
+    "cfg_scale": 7.5,
+    "seed": 42
+  })";
+
+  job.progressCallback = [&](const std::string& json) {
+    std::lock_guard<std::mutex> lk(mu);
+    progressTicks.push_back(json);
+    std::cout << "\r  " << json << std::flush;
+  };
+
+  job.outputCallback = [&](const std::vector<uint8_t>& png) {
+    std::lock_guard<std::mutex> lk(mu);
+    images.push_back(png);
+    std::cout << "\n  Output: " << png.size() << " bytes" << std::endl;
+  };
+
+  EXPECT_NO_THROW(model->process(std::any(job)));
+
+  // -- Image assertions (same checks as the JS test) -------------------------
+  ASSERT_EQ(images.size(), 1u) << "Expected exactly 1 output image";
+
+  const auto& img = images[0];
+  EXPECT_GT(img.size(), 0u) << "Image must be non-empty";
+  EXPECT_TRUE(sd_test_helpers::isPng(img))
+      << "Image must have valid PNG magic bytes";
+
+  // -- Progress assertions ----------------------------------------------------
+  EXPECT_GT(progressTicks.size(), 0u)
+      << "Must receive at least 1 progress tick";
+
+  // The last tick should report total == 10 (the configured step count).
+  // Progress JSON shape: {"step":N,"total":M,"elapsed_ms":T}
+  const auto& lastTick = progressTicks.back();
+  EXPECT_NE(lastTick.find("\"total\":10"), std::string::npos)
+      << "Final progress tick must report total=10, got: " << lastTick;
+
+  // -- Save output to output/ -------------------------------------------------
+#ifdef PROJECT_ROOT
+  const std::string outDir = std::string(PROJECT_ROOT) + "/output";
+#else
+  const std::string outDir = "output";
+#endif
+  std::filesystem::create_directories(outDir);
+  const std::string outPath = outDir + "/cpp-sd2-txt2img-seed42.png";
+  std::ofstream ofs(outPath, std::ios::binary);
+  ofs.write(
+      reinterpret_cast<const char*>(img.data()),
+      static_cast<std::streamsize>(img.size()));
+  ofs.close();
+  std::cout << "  Saved → " << outPath << std::endl;
+
+  // -- Runtime stats ----------------------------------------------------------
+  const auto stats = model->runtimeStats();
+  EXPECT_FALSE(stats.empty())
+      << "runtimeStats() should be populated after generation";
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_model_loading.cpp b/packages/lib-infer-diffusion/test/unit/test_model_loading.cpp
new file mode 100644
index 0000000000..0b7d60884d
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_model_loading.cpp
@@ -0,0 +1,49 @@
+#include <memory>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "model-interface/SdModel.hpp"
+#include "test_common.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+
+class SdModelLoadingTest : public ::testing::Test {
+protected:
+  std::string modelPath;
+
+  void SetUp() override {
+    modelPath = sd_test_helpers::getModelPath();
+    if (modelPath.empty())
+      GTEST_SKIP() << "SD2.1 model not available — set SD_TEST_MODEL_PATH or "
+                      "download to test/model/";
+  }
+
+  std::unique_ptr<SdModel> makeModel() {
+    SdCtxConfig config{};
+    config.modelPath = modelPath;
+    config.prediction = V_PRED;
+    config.nThreads = sd_test_helpers::getTestThreads();
+    config.device = sd_test_helpers::getTestDevice();
+    return std::make_unique<SdModel>(std::move(config));
+  }
+};
+
+TEST_F(SdModelLoadingTest, LoadSD2ModelSucceeds) {
+  auto model = makeModel();
+  ASSERT_FALSE(model->isLoaded());
+
+  EXPECT_NO_THROW(model->load());
+  EXPECT_TRUE(model->isLoaded());
+  // model goes out of scope here — destructor releases GPU memory
+}
+
+TEST_F(SdModelLoadingTest, DestructorReleasesResources) {
+  // Verify that a loaded model is reported as loaded, then let the destructor
+  // free resources by going out of scope.
+  {
+    auto model = makeModel();
+    model->load();
+    ASSERT_TRUE(model->isLoaded());
+  } // destructor called here — must not crash
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_sd_gen_handlers.cpp b/packages/lib-infer-diffusion/test/unit/test_sd_gen_handlers.cpp
new file mode 100644
index 0000000000..7105c64d6e
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_sd_gen_handlers.cpp
@@ -0,0 +1,427 @@
+/**
+ * Unit tests for SdGenHandlers parsers, SdImageBatch RAII, and SdModel
+ * lifecycle.
+ *
+ * Coverage:
+ *   1.  parseSampler   – unordered_map refactor (valid / unknown)
+ *   2.  parseScheduler – unordered_map refactor (valid / unknown)
+ *   3.  parseCacheMode – unordered_map refactor (valid / "" / unknown)
+ *   4.  cache_preset   – pair<mode,threshold> map (all 4 presets / unknown)
+ *   5.  parseVaeTileSize – C++20 from_chars + string_view
+ *                         (int / "WxH" / bad format / wrong type)
+ *   6.  SdImageBatch   – RAII wrapper: pixel buffers freed on scope exit,
+ *                        release(i) for early per-image free, and
+ *                        exception safety during iteration
+ *   7.  IModelAsyncLoad removed – SdModel no longer implements it
+ */
+
+#include <cstdlib>
+#include <stdexcept>
+
+#include <gtest/gtest.h>
+#include <picojson/picojson.h>
+#include <stable-diffusion.h>
+
+#include "handlers/SdGenHandlers.hpp"
+#include "model-interface/SdModel.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+using namespace qvac_errors;
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Helpers
+// ─────────────────────────────────────────────────────────────────────────────
+
+// Build a one-key picojson::object so we can exercise individual handlers.
+static picojson::object
+makeObj(const std::string& key, const picojson::value& val) {
+  picojson::object obj;
+  obj[key] = val;
+  return obj;
+}
+
+static picojson::value str(const std::string& s) { return picojson::value(s); }
+
+static picojson::value num(double n) { return picojson::value(n); }
+
+static picojson::value boolean(bool b) { return picojson::value(b); }
+
+// Apply a single handler by name and return the resulting config.
+static SdGenConfig
+applyOne(const std::string& key, const picojson::value& val) {
+  SdGenConfig cfg;
+  applySdGenHandlers(cfg, makeObj(key, val));
+  return cfg;
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 1. parseSampler
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdGenHandlers_Sampler, EulerMapsCorrectly) {
+  auto cfg = applyOne("sampling_method", str("euler"));
+  EXPECT_EQ(cfg.sampleMethod, EULER_SAMPLE_METHOD);
+}
+
+TEST(SdGenHandlers_Sampler, EulerAMapsCorrectly) {
+  auto cfg = applyOne("sampling_method", str("euler_a"));
+  EXPECT_EQ(cfg.sampleMethod, EULER_A_SAMPLE_METHOD);
+}
+
+TEST(SdGenHandlers_Sampler, HeunMapsCorrectly) {
+  auto cfg = applyOne("sampler", str("heun"));
+  EXPECT_EQ(cfg.sampleMethod, HEUN_SAMPLE_METHOD);
+}
+
+TEST(SdGenHandlers_Sampler, AllSamplersAccepted) {
+  const std::vector<std::pair<std::string, sample_method_t>> cases{
+      {"euler", EULER_SAMPLE_METHOD},
+      {"euler_a", EULER_A_SAMPLE_METHOD},
+      {"heun", HEUN_SAMPLE_METHOD},
+      {"dpm2", DPM2_SAMPLE_METHOD},
+      {"dpm++2m", DPMPP2M_SAMPLE_METHOD},
+      {"dpm++2mv2", DPMPP2Mv2_SAMPLE_METHOD},
+      {"dpm++2s_a", DPMPP2S_A_SAMPLE_METHOD},
+      {"lcm", LCM_SAMPLE_METHOD},
+      {"ipndm", IPNDM_SAMPLE_METHOD},
+      {"ipndm_v", IPNDM_V_SAMPLE_METHOD},
+      {"ddim_trailing", DDIM_TRAILING_SAMPLE_METHOD},
+      {"tcd", TCD_SAMPLE_METHOD},
+      {"res_multistep", RES_MULTISTEP_SAMPLE_METHOD},
+      {"res_2s", RES_2S_SAMPLE_METHOD},
+  };
+  for (const auto& [name, expected] : cases) {
+    SdGenConfig cfg;
+    EXPECT_NO_THROW(
+        applySdGenHandlers(cfg, makeObj("sampling_method", str(name))))
+        << "sampler: " << name;
+    EXPECT_EQ(cfg.sampleMethod, expected) << "sampler: " << name;
+  }
+}
+
+TEST(SdGenHandlers_Sampler, UnknownSamplerThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("sampling_method", str("bogus_sampler"))),
+      StatusError);
+}
+
+TEST(SdGenHandlers_Sampler, BothAliasesRouteToSameField) {
+  auto cfgA = applyOne("sampling_method", str("euler"));
+  auto cfgB = applyOne("sampler", str("euler"));
+  EXPECT_EQ(cfgA.sampleMethod, cfgB.sampleMethod);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 2. parseScheduler
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdGenHandlers_Scheduler, AllSchedulersAccepted) {
+  const std::vector<std::pair<std::string, scheduler_t>> cases{
+      {"discrete", DISCRETE_SCHEDULER},
+      {"karras", KARRAS_SCHEDULER},
+      {"exponential", EXPONENTIAL_SCHEDULER},
+      {"ays", AYS_SCHEDULER},
+      {"gits", GITS_SCHEDULER},
+      {"sgm_uniform", SGM_UNIFORM_SCHEDULER},
+      {"simple", SIMPLE_SCHEDULER},
+      {"lcm", LCM_SCHEDULER},
+      {"smoothstep", SMOOTHSTEP_SCHEDULER},
+      {"kl_optimal", KL_OPTIMAL_SCHEDULER},
+      {"bong_tangent", BONG_TANGENT_SCHEDULER},
+  };
+  for (const auto& [name, expected] : cases) {
+    SdGenConfig cfg;
+    EXPECT_NO_THROW(applySdGenHandlers(cfg, makeObj("scheduler", str(name))))
+        << "scheduler: " << name;
+    EXPECT_EQ(cfg.scheduler, expected) << "scheduler: " << name;
+  }
+}
+
+TEST(SdGenHandlers_Scheduler, UnknownSchedulerThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("scheduler", str("no_such_scheduler"))),
+      StatusError);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 3. parseCacheMode
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdGenHandlers_CacheMode, DisabledStringMapsToDisabled) {
+  auto cfg = applyOne("cache_mode", str("disabled"));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_DISABLED);
+}
+
+TEST(SdGenHandlers_CacheMode, EmptyStringMapsToDisabled) {
+  // Both "" and "disabled" are accepted aliases for SD_CACHE_DISABLED.
+  auto cfg = applyOne("cache_mode", str(""));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_DISABLED);
+}
+
+TEST(SdGenHandlers_CacheMode, AllCacheModesAccepted) {
+  const std::vector<std::pair<std::string, sd_cache_mode_t>> cases{
+      {"", SD_CACHE_DISABLED},
+      {"disabled", SD_CACHE_DISABLED},
+      {"easycache", SD_CACHE_EASYCACHE},
+      {"ucache", SD_CACHE_UCACHE},
+      {"dbcache", SD_CACHE_DBCACHE},
+      {"taylorseer", SD_CACHE_TAYLORSEER},
+      {"cache-dit", SD_CACHE_CACHE_DIT},
+  };
+  for (const auto& [name, expected] : cases) {
+    SdGenConfig cfg;
+    EXPECT_NO_THROW(applySdGenHandlers(cfg, makeObj("cache_mode", str(name))))
+        << "cache_mode: '" << name << "'";
+    EXPECT_EQ(cfg.cacheMode, expected) << "cache_mode: '" << name << "'";
+  }
+}
+
+TEST(SdGenHandlers_CacheMode, UnknownCacheModeThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("cache_mode", str("quantum_cache"))),
+      StatusError);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 4. cache_preset handler
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdGenHandlers_CachePreset, SlowPresetSetsModeAndThreshold) {
+  auto cfg = applyOne("cache_preset", str("slow"));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_EASYCACHE);
+  EXPECT_FLOAT_EQ(cfg.cacheThreshold, 0.60f);
+}
+
+TEST(SdGenHandlers_CachePreset, MediumPreset) {
+  auto cfg = applyOne("cache_preset", str("medium"));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_EASYCACHE);
+  EXPECT_FLOAT_EQ(cfg.cacheThreshold, 0.40f);
+}
+
+TEST(SdGenHandlers_CachePreset, FastPreset) {
+  auto cfg = applyOne("cache_preset", str("fast"));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_EASYCACHE);
+  EXPECT_FLOAT_EQ(cfg.cacheThreshold, 0.25f);
+}
+
+TEST(SdGenHandlers_CachePreset, UltraPreset) {
+  auto cfg = applyOne("cache_preset", str("ultra"));
+  EXPECT_EQ(cfg.cacheMode, SD_CACHE_EASYCACHE);
+  EXPECT_FLOAT_EQ(cfg.cacheThreshold, 0.15f);
+}
+
+TEST(SdGenHandlers_CachePreset, PresetsOrderedByThreshold) {
+  // Sanity check: slow > medium > fast > ultra (higher threshold =
+  // safer/slower)
+  auto slow = applyOne("cache_preset", str("slow"));
+  auto medium = applyOne("cache_preset", str("medium"));
+  auto fast = applyOne("cache_preset", str("fast"));
+  auto ultra = applyOne("cache_preset", str("ultra"));
+  EXPECT_GT(slow.cacheThreshold, medium.cacheThreshold);
+  EXPECT_GT(medium.cacheThreshold, fast.cacheThreshold);
+  EXPECT_GT(fast.cacheThreshold, ultra.cacheThreshold);
+}
+
+TEST(SdGenHandlers_CachePreset, UnknownPresetThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("cache_preset", str("turbo"))),
+      StatusError);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 5. parseVaeTileSize
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdGenHandlers_VaeTileSize, IntegerAppliesToBothAxes) {
+  auto cfg = applyOne("vae_tile_size", num(256.0));
+  EXPECT_EQ(cfg.vaeTileSizeX, 256);
+  EXPECT_EQ(cfg.vaeTileSizeY, 256);
+}
+
+TEST(SdGenHandlers_VaeTileSize, WxHStringSetsAxesIndependently) {
+  auto cfg = applyOne("vae_tile_size", str("128x64"));
+  EXPECT_EQ(cfg.vaeTileSizeX, 128);
+  EXPECT_EQ(cfg.vaeTileSizeY, 64);
+}
+
+TEST(SdGenHandlers_VaeTileSize, SquareWxHString) {
+  auto cfg = applyOne("vae_tile_size", str("512x512"));
+  EXPECT_EQ(cfg.vaeTileSizeX, 512);
+  EXPECT_EQ(cfg.vaeTileSizeY, 512);
+}
+
+TEST(SdGenHandlers_VaeTileSize, StringWithoutXSeparatorThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("vae_tile_size", str("256"))),
+      StatusError);
+}
+
+TEST(SdGenHandlers_VaeTileSize, NonNumericWxHThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("vae_tile_size", str("abcxdef"))),
+      StatusError);
+}
+
+TEST(SdGenHandlers_VaeTileSize, WrongTypeThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("vae_tile_size", boolean(true))),
+      StatusError);
+}
+
+TEST(SdGenHandlers_VaeTileSize, MissingRhsThrows) {
+  SdGenConfig cfg;
+  EXPECT_THROW(
+      applySdGenHandlers(cfg, makeObj("vae_tile_size", str("128x"))),
+      StatusError);
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 6. SdImageBatch – RAII memory management
+//
+// SdImageBatch lives in the anonymous namespace of SdModel.cpp so it cannot
+// be instantiated directly in a test.  We mirror the class here to validate
+// the RAII contract in isolation; the same design is exercised end-to-end by
+// the full-generation integration tests.
+// ─────────────────────────────────────────────────────────────────────────────
+
+namespace {
+
+// Minimal replica of the SdImageBatch RAII class used in SdModel.cpp.
+class SdImageBatchTest {
+public:
+  SdImageBatchTest(sd_image_t* data, int count) : data_(data), count_(count) {}
+  ~SdImageBatchTest() {
+    for (int i = 0; i < count_; ++i) {
+      free(data_[i].data); // nullptr-safe
+    }
+    free(data_);
+  }
+
+  SdImageBatchTest(const SdImageBatchTest&) = delete;
+  SdImageBatchTest& operator=(const SdImageBatchTest&) = delete;
+  SdImageBatchTest(SdImageBatchTest&&) = delete;
+  SdImageBatchTest& operator=(SdImageBatchTest&&) = delete;
+
+  [[nodiscard]] int count() const { return count_; }
+  [[nodiscard]] const sd_image_t& operator[](int i) const { return data_[i]; }
+  void release(int i) {
+    free(data_[i].data);
+    data_[i].data = nullptr;
+  }
+
+private:
+  sd_image_t* const data_;
+  const int count_;
+};
+
+// Build a heap-allocated sd_image_t array with real malloc'd pixel buffers
+// so that ASan / valgrind can detect any missing free().
+static sd_image_t* makeFakeImages(int count, int pixelBytes = 4) {
+  auto* arr = static_cast<sd_image_t*>(malloc(sizeof(sd_image_t) * count));
+  for (int i = 0; i < count; ++i) {
+    arr[i].width = 1;
+    arr[i].height = 1;
+    arr[i].channel = pixelBytes;
+    arr[i].data = static_cast<uint8_t*>(malloc(pixelBytes));
+  }
+  return arr;
+}
+
+} // anonymous namespace
+
+TEST(SdImageBatch, DestructorFreesAllBuffersOnNormalExit) {
+  // ASAN will catch a double-free or leak if our destructor is wrong.
+  {
+    SdImageBatchTest batch(makeFakeImages(3), 3);
+    // images iterated but NOT released manually — destructor must clean up.
+    for (int i = 0; i < batch.count(); ++i) {
+      EXPECT_NE(batch[i].data, nullptr);
+    }
+  } // destructor fires here
+}
+
+TEST(SdImageBatch, ReleaseNullsPointerSoDestructorSkipsIt) {
+  SdImageBatchTest batch(makeFakeImages(2), 2);
+  batch.release(0);                  // free pixel buf for image 0 early
+  EXPECT_EQ(batch[0].data, nullptr); // release() sets data to nullptr
+  EXPECT_NE(batch[1].data, nullptr); // image 1 still valid
+  // destructor calls free(nullptr) for slot 0 (no-op) and frees slot 1
+}
+
+TEST(SdImageBatch, DestructorFiresEvenWhenExceptionThrown) {
+  bool destructorRan = false;
+
+  // Wrap batch in a scope that throws to simulate encodeToPng/outputCallback
+  // throwing mid-iteration.  Without RAII this would leak all pixel buffers.
+  struct Guard {
+    bool& ran;
+    ~Guard() { ran = true; }
+  };
+
+  try {
+    SdImageBatchTest batch(makeFakeImages(3), 3);
+    Guard g{destructorRan};
+    (void)g; // suppress unused warning
+    throw std::runtime_error("simulated callback failure");
+  } catch (const std::runtime_error&) {
+  }
+
+  EXPECT_TRUE(destructorRan);
+  // If SdImageBatchTest destructor did NOT run, ASan would report leaks above.
+}
+
+TEST(SdImageBatch, EarlyReleaseAllowsImmediateMemoryRecovery) {
+  // Simulates the production loop: encode → release → next image
+  sd_image_t* arr = makeFakeImages(4);
+  SdImageBatchTest batch(arr, 4);
+
+  for (int i = 0; i < batch.count(); ++i) {
+    // "process" image i
+    EXPECT_NE(batch[i].data, nullptr);
+    batch.release(i); // pixel buffer freed immediately
+    EXPECT_EQ(batch[i].data, nullptr);
+  }
+  // destructor: all data_[i].data are nullptr → only the array itself is freed
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 7. IModelAsyncLoad removed – SdModel must NOT implement that interface
+// ─────────────────────────────────────────────────────────────────────────────
+
+TEST(SdModel_NoAsyncLoad, SdModelDoesNotImplementIModelAsyncLoad) {
+  SdCtxConfig cfg{};
+  SdModel model(std::move(cfg));
+
+  const auto* asyncLoad =
+      dynamic_cast<qvac_lib_inference_addon_cpp::model::IModelAsyncLoad*>(
+          &model);
+
+  EXPECT_EQ(asyncLoad, nullptr)
+      << "SdModel should not implement IModelAsyncLoad — it uses a custom "
+         "activate() in AddonJs.hpp that calls load() directly instead";
+}
+
+TEST(SdModel_NoAsyncLoad, SdModelStillImplementsIModel) {
+  SdCtxConfig cfg{};
+  SdModel model(std::move(cfg));
+
+  const auto* imodel =
+      dynamic_cast<qvac_lib_inference_addon_cpp::model::IModel*>(&model);
+  EXPECT_NE(imodel, nullptr);
+}
+
+TEST(SdModel_NoAsyncLoad, SdModelStillImplementsIModelCancel) {
+  SdCtxConfig cfg{};
+  SdModel model(std::move(cfg));
+
+  const auto* icancel =
+      dynamic_cast<qvac_lib_inference_addon_cpp::model::IModelCancel*>(&model);
+  EXPECT_NE(icancel, nullptr);
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_sd_model.cpp b/packages/lib-infer-diffusion/test/unit/test_sd_model.cpp
new file mode 100644
index 0000000000..6b60ea58b1
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_sd_model.cpp
@@ -0,0 +1,31 @@
+#include <gtest/gtest.h>
+
+#include "model-interface/SdModel.hpp"
+#include "test_common.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+
+class SdModelTest : public ::testing::Test {};
+
+TEST_F(SdModelTest, ConstructWithEmptyConfigDoesNotThrow) {
+  SdCtxConfig config{};
+  EXPECT_NO_THROW(SdModel model(std::move(config)));
+}
+
+TEST_F(SdModelTest, IsNotLoadedAfterConstruction) {
+  SdCtxConfig config{};
+  SdModel model(std::move(config));
+  EXPECT_FALSE(model.isLoaded());
+}
+
+TEST_F(SdModelTest, GetNameReturnsSdModel) {
+  SdCtxConfig config{};
+  SdModel model(std::move(config));
+  EXPECT_EQ(model.getName(), "SdModel");
+}
+
+TEST_F(SdModelTest, DestroyUnloadedModelIsNoop) {
+  SdCtxConfig config{};
+  // Destructor on a never-loaded model must not crash.
+  EXPECT_NO_THROW({ SdModel model(std::move(config)); });
+}
diff --git a/packages/lib-infer-diffusion/test/unit/test_single_step_inference.cpp b/packages/lib-infer-diffusion/test/unit/test_single_step_inference.cpp
new file mode 100644
index 0000000000..a3cfb6a3e1
--- /dev/null
+++ b/packages/lib-infer-diffusion/test/unit/test_single_step_inference.cpp
@@ -0,0 +1,78 @@
+#include <any>
+#include <atomic>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "model-interface/SdModel.hpp"
+#include "test_common.hpp"
+
+using namespace qvac_lib_inference_addon_sd;
+
+class SdSingleStepInferenceTest : public ::testing::Test {
+protected:
+  static std::unique_ptr<SdModel> model;
+
+  static void SetUpTestSuite() {
+    const auto path = sd_test_helpers::getModelPath();
+    if (path.empty())
+      return;
+
+    SdCtxConfig config{};
+    config.modelPath = path;
+    config.prediction = V_PRED;
+    config.nThreads = sd_test_helpers::getTestThreads();
+    config.device = sd_test_helpers::getTestDevice();
+
+    model = std::make_unique<SdModel>(std::move(config));
+    model->load();
+  }
+
+  static void TearDownTestSuite() {
+    model.reset(); // destructor releases GPU memory
+  }
+
+  void SetUp() override {
+    if (!model)
+      GTEST_SKIP() << "SD2.1 model not available — set SD_TEST_MODEL_PATH or "
+                      "download to test/model/";
+  }
+};
+
+std::unique_ptr<SdModel> SdSingleStepInferenceTest::model = nullptr;
+
+TEST_F(SdSingleStepInferenceTest, SingleStepProducesValidPng) {
+  std::vector<std::vector<uint8_t>> images;
+  std::atomic<int> progressSteps{0};
+
+  SdModel::GenerationJob job;
+  job.paramsJson = R"({
+    "prompt": "solid white background",
+    "negative_prompt": "",
+    "steps": 1,
+    "width": 512,
+    "height": 512,
+    "cfg_scale": 7.0,
+    "seed": 1
+  })";
+
+  job.progressCallback = [&](const std::string& json) {
+    progressSteps.fetch_add(1);
+    std::cout << "\r  " << json << std::flush;
+  };
+
+  job.outputCallback = [&](const std::vector<uint8_t>& png) {
+    images.push_back(png);
+    std::cout << "\n  Output: " << png.size() << " bytes" << std::endl;
+  };
+
+  EXPECT_NO_THROW(model->process(std::any(job)));
+
+  ASSERT_EQ(images.size(), 1u) << "Expected exactly 1 output image";
+  EXPECT_GT(images[0].size(), 0u) << "Image must be non-empty";
+  EXPECT_TRUE(sd_test_helpers::isPng(images[0])) << "Output must be valid PNG";
+  EXPECT_GE(progressSteps.load(), 1) << "At least 1 progress tick expected";
+}
diff --git a/packages/lib-infer-diffusion/tsconfig.dts.json b/packages/lib-infer-diffusion/tsconfig.dts.json
new file mode 100644
index 0000000000..434a16acea
--- /dev/null
+++ b/packages/lib-infer-diffusion/tsconfig.dts.json
@@ -0,0 +1,17 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "lib": ["ES2022"],
+    "types": ["node"],
+    "skipLibCheck": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "verbatimModuleSyntax": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "noEmit": true
+  },
+  "include": ["index.d.ts", "addonLogging.d.ts"]
+}
diff --git a/packages/lib-infer-diffusion/vcpkg-configuration.json b/packages/lib-infer-diffusion/vcpkg-configuration.json
new file mode 100644
index 0000000000..eb15cebb17
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg-configuration.json
@@ -0,0 +1,22 @@
+{
+  "overlay-ports": [
+    "vcpkg/ports"
+  ],
+  "default-registry": {
+    "kind": "git",
+    "baseline": "a9eae49a7c95a63755c0b44ecf9d74b0a00e181b",
+    "repository": "git@github.com:tetherto/qvac-registry-vcpkg.git"
+  },
+  "registries": [
+    {
+      "kind": "git",
+      "baseline": "16c71a39e5a0fc0bdb3fad03beef8f38ee00ee3b",
+      "repository": "https://github.com/microsoft/vcpkg",
+      "packages": [
+        "gtest",
+        "picojson",
+        "stb"
+      ]
+    }
+  ]
+}
diff --git a/packages/lib-infer-diffusion/vcpkg.json b/packages/lib-infer-diffusion/vcpkg.json
new file mode 100644
index 0000000000..e7d78dcf46
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg.json
@@ -0,0 +1,74 @@
+{
+  "dependencies": [
+    {
+      "name": "ggml",
+      "version>=": "2026-01-30#1"
+    },
+    "picojson",
+    {
+      "name": "qvac-lib-inference-addon-cpp",
+      "version>=": "1.1.2"
+    },
+    {
+      "name": "qvac-lint-cpp",
+      "version>=": "1.4.4"
+    },
+    {
+      "name": "stable-diffusion-cpp",
+      "version>=": "2026-03-01"
+    },
+    "stb"
+  ],
+  "features": {
+    "cuda": {
+      "description": "Enable CUDA GPU backend",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": [
+            "cuda"
+          ]
+        }
+      ]
+    },
+    "metal": {
+      "description": "Enable Metal GPU backend (macOS/iOS)",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": [
+            "metal"
+          ]
+        }
+      ]
+    },
+    "opencl": {
+      "description": "Enable OpenCL GPU backend (Android/Adreno)",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": [
+            "opencl"
+          ]
+        }
+      ]
+    },
+    "tests": {
+      "description": "Build tests",
+      "dependencies": [
+        "gtest"
+      ]
+    },
+    "vulkan": {
+      "description": "Enable Vulkan GPU backend",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": [
+            "vulkan"
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/android-vulkan-version.cmake b/packages/lib-infer-diffusion/vcpkg/ports/ggml/android-vulkan-version.cmake
new file mode 100644
index 0000000000..16ac7c0826
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/android-vulkan-version.cmake
@@ -0,0 +1,37 @@
+# Detect the Vulkan version shipped with the Android NDK by parsing
+# vulkan_core.h from the NDK sysroot.  Sets `vulkan_version` in the
+# caller's scope (e.g. "1.3.275").
+function(detect_ndk_vulkan_version)
+    string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower)
+
+    file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*")
+    if(host_dirs)
+        list(GET host_dirs 0 host_dir)
+        get_filename_component(host_arch "${host_dir}" NAME)
+        set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h")
+    else()
+        message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}")
+    endif()
+
+    if(NOT EXISTS "${vulkan_core_h}")
+        message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}")
+    endif()
+
+    file(READ "${vulkan_core_h}" header_content)
+    string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}")
+    if(version_match)
+        set(header_version_3 "${CMAKE_MATCH_1}")
+    else()
+        message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}")
+    endif()
+
+    # Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL
+    string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}")
+    if(version_match)
+        set(major "${CMAKE_MATCH_2}")
+        set(minor "${CMAKE_MATCH_3}")
+        set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE)
+    else()
+        message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}")
+    endif()
+endfunction()
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-config-include-dir.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-config-include-dir.patch
new file mode 100644
index 0000000000..f9cc14a92a
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-config-include-dir.patch
@@ -0,0 +1,20 @@
+--- a/cmake/ggml-config.cmake.in
++++ b/cmake/ggml-config.cmake.in
+@@ -112,7 +112,8 @@
+     add_library(ggml::ggml UNKNOWN IMPORTED)
+     set_target_properties(ggml::ggml
+         PROPERTIES
+-            IMPORTED_LOCATION "${GGML_LIBRARY}")
++            IMPORTED_LOCATION "${GGML_LIBRARY}"
++            INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}")
+ 
+     find_library(GGML_BASE_LIBRARY ggml-base
+         REQUIRED
+@@ -120,6 +121,7 @@
+         NO_CMAKE_FIND_ROOT_PATH)
+ 
+     add_library(ggml::ggml-base UNKNOWN IMPORTED)
++    set_property(TARGET ggml::ggml-base PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}")
+     set_target_properties(ggml::ggml-base
+         PROPERTIES
+             IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-cpu-static-hybrid.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-cpu-static-hybrid.patch
new file mode 100644
index 0000000000..1e202d198d
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-cpu-static-hybrid.patch
@@ -0,0 +1,64 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index b0b8e578..576d6a04 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -177,6 +177,7 @@ option(GGML_XTHEADVECTOR     "ggml: enable xtheadvector"     OFF)
+ option(GGML_VXE              "ggml: enable vxe"              ${GGML_NATIVE})
+ 
+ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
++option(GGML_CPU_STATIC        "ggml: build CPU backend as static library even with GGML_BACKEND_DL"   OFF)
+ set(GGML_CPU_ARM_ARCH        "" CACHE STRING "ggml: CPU architecture for ARM")
+ set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
+ 
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index d577f809..ef3a1308 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -187,6 +187,10 @@ endif()
+ 
+ # GGML_BACKEND_DL works with static core when PIC is enabled below.
+ 
++if (GGML_CPU_STATIC AND GGML_CPU_ALL_VARIANTS)
++    message(FATAL_ERROR "GGML_CPU_STATIC is incompatible with GGML_CPU_ALL_VARIANTS")
++endif()
++
+ add_library(ggml-base
+             ../include/ggml.h
+             ../include/ggml-alloc.h
+@@ -243,7 +247,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+ endif()
+ 
+ function(ggml_add_backend_library backend)
+-    if (GGML_BACKEND_DL)
++    if (GGML_BACKEND_DL AND NOT (GGML_CPU_STATIC AND ${backend} MATCHES "^ggml-cpu"))
+         add_library(${backend} MODULE ${ARGN})
+         # write the shared library to the output directory
+         set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+@@ -299,6 +303,9 @@ function(ggml_add_backend backend)
+             string(TOUPPER "GGML_USE_${backend}" backend_use)
+             target_compile_definitions(ggml PUBLIC ${backend_use})
+         endif()
++        if (GGML_CPU_STATIC AND "${backend}" STREQUAL "CPU")
++            target_compile_definitions(ggml PUBLIC GGML_USE_CPU)
++        endif()
+     endif()
+ endfunction()
+ 
+diff --git a/cmake/ggml-config.cmake.in b/cmake/ggml-config.cmake.in
+index 91c9d5cd..0c7a92c8 100644
+--- a/cmake/ggml-config.cmake.in
++++ b/cmake/ggml-config.cmake.in
+@@ -128,6 +128,12 @@ if(NOT TARGET ggml::ggml)
+     set(_ggml_all_targets "")
+-    if (NOT GGML_BACKEND_DL)
++    # In hybrid mode (GGML_BACKEND_DL + GGML_CPU_STATIC), only the CPU backend
++    # is static and must still be exported to downstream consumers.
++    if (NOT GGML_BACKEND_DL OR GGML_CPU_STATIC)
+         foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
++            string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
++            if (GGML_BACKEND_DL AND GGML_CPU_STATIC AND NOT is_cpu_variant)
++                continue()
++            endif()
+             string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
+             string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)
+ 
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-max-name.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-max-name.patch
new file mode 100644
index 0000000000..c0a2321a5b
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-max-name.patch
@@ -0,0 +1,31 @@
+diff --git a/cmake/ggml-config.cmake.in b/cmake/ggml-config.cmake.in
+index 91c9d5c..b7d8ec2 100644
+--- a/cmake/ggml-config.cmake.in
++++ b/cmake/ggml-config.cmake.in
+@@ -124,6 +124,11 @@ if(NOT TARGET ggml::ggml)
+         PROPERTIES
+             IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
+ 
++    if(GGML_MAX_NAME)
++        set_property(TARGET ggml::ggml-base APPEND PROPERTY
++            INTERFACE_COMPILE_DEFINITIONS "GGML_MAX_NAME=${GGML_MAX_NAME}")
++    endif()
++
+     set(_ggml_all_targets "")
+     if (NOT GGML_BACKEND_DL)
+         foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 2650237..d0987d1 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -221,6 +221,10 @@ if (GGML_SCHED_NO_REALLOC)
+     target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC)
+ endif()
+ 
++if (DEFINED GGML_MAX_NAME)
++    target_compile_definitions(ggml-base PUBLIC GGML_MAX_NAME=${GGML_MAX_NAME})
++endif()
++
+ add_library(ggml
+             ggml-backend-dl.cpp
+             ggml-backend-reg.cpp)
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-graceful-no-devices.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-graceful-no-devices.patch
new file mode 100644
index 0000000000..6ea2c76e71
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-graceful-no-devices.patch
@@ -0,0 +1,25 @@
+--- a/src/ggml-opencl/ggml-opencl.cpp
++++ b/src/ggml-opencl/ggml-opencl.cpp
+@@ -3467,6 +3467,9 @@
+ 
+ ggml_backend_t ggml_backend_opencl_init(void) {
+     ggml_backend_dev_t dev = ggml_backend_reg_dev_get(ggml_backend_opencl_reg(), 0);
++    if (!dev) {
++        return nullptr;
++    }
+     ggml_backend_opencl_context *backend_ctx = ggml_cl2_init(dev);
+ 
+     ggml_backend_t backend = new ggml_backend {
+@@ -4722,7 +4725,11 @@
+ }
+ 
+ static ggml_backend_dev_t ggml_backend_opencl_reg_device_get(ggml_backend_reg_t reg, size_t index) {
+-    GGML_ASSERT(index < ggml_backend_opencl_reg_device_count(reg));
++    size_t n = ggml_backend_opencl_reg_device_count(reg);
++    if (n == 0) {
++        return nullptr;
++    }
++    GGML_ASSERT(index < n);
+ 
+     return &g_ggml_backend_opencl_devices[index];
+ 
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-public-header.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-public-header.patch
new file mode 100644
index 0000000000..3e95fdbb9a
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-opencl-public-header.patch
@@ -0,0 +1,11 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -319,6 +319,7 @@
+     include/ggml-cpp.h
+     include/ggml-cuda.h
+     include/ggml-opt.h
++    include/ggml-opencl.h
+     include/ggml-metal.h
+     include/ggml-rpc.h
+     include/ggml-virtgpu.h
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-qvac-backend-prefix.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-qvac-backend-prefix.patch
new file mode 100644
index 0000000000..17745ab3e6
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-qvac-backend-prefix.patch
@@ -0,0 +1,45 @@
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 4eda64b5..fa379062 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -254,7 +254,9 @@ function(ggml_add_backend_library backend)
+     if (GGML_BACKEND_DL AND NOT (GGML_CPU_STATIC AND ${backend} MATCHES "^ggml-cpu"))
+         add_library(${backend} MODULE ${ARGN})
+         # write the shared library to the output directory
+-        set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
++        set_target_properties(${backend} PROPERTIES
++            LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
++            OUTPUT_NAME "qvac-diffusion-${backend}")
+         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
+         add_dependencies(ggml ${backend})
+         if (GGML_BACKEND_DIR)
+diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp
+index 8a693f84..a514206d 100644
+--- a/src/ggml-backend-reg.cpp
++++ b/src/ggml-backend-reg.cpp
+@@ -437,9 +437,9 @@ static fs::path get_executable_path() {
+ 
+ static fs::path backend_filename_prefix() {
+ #ifdef _WIN32
+-    return fs::u8path("ggml-");
++    return fs::u8path("qvac-diffusion-ggml-");
+ #else
+-    return fs::u8path("libggml-");
++    return fs::u8path("libqvac-diffusion-ggml-");
+ #endif
+ }
+ 
+@@ -526,6 +526,13 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
+                 }
+             }
+         }
++        // Android app packaging can flatten native libraries into one directory.
++        // If loading from the requested subdirectory fails, retry by filename only
++        // and leave lookup to dlopen's default search path resolution.
++        fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
++        if (auto reg = get_reg().load_backend(filename, silent)) {
++            return reg;
++        }
+         return nullptr;
+     }
+ 
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-static-core-dl-backends.patch b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-static-core-dl-backends.patch
new file mode 100644
index 0000000000..edce48bbc5
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/ggml-static-core-dl-backends.patch
@@ -0,0 +1,31 @@
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -185,9 +185,7 @@
+ 
+ # ggml
+ 
+-if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
+-    message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
+-endif()
++# GGML_BACKEND_DL works with static core when PIC is enabled below.
+ 
+ add_library(ggml-base
+             ../include/ggml.h
+@@ -269,7 +267,7 @@
+     target_link_libraries(${backend} PRIVATE ggml-base)
+     target_include_directories(${backend} PRIVATE ..)
+ 
+-    if (${BUILD_SHARED_LIBS})
++    if (BUILD_SHARED_LIBS OR GGML_BACKEND_DL)
+         target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
+         target_compile_definitions(${backend} PUBLIC  GGML_BACKEND_SHARED)
+     endif()
+@@ -487,7 +485,7 @@
+     target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
+ endif()
+ 
+-if (BUILD_SHARED_LIBS)
++if (BUILD_SHARED_LIBS OR GGML_BACKEND_DL)
+     foreach (target ggml-base ggml)
+         set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+         target_compile_definitions(${target} PRIVATE GGML_BUILD)
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/portfile.cmake b/packages/lib-infer-diffusion/vcpkg/ports/ggml/portfile.cmake
new file mode 100644
index 0000000000..64af3bfb6f
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/portfile.cmake
@@ -0,0 +1,172 @@
+# ggml vcpkg overlay port
+#
+# Builds the ggml tensor library from ggml-org/ggml.
+# Pinned to the commit used by stable-diffusion.cpp tag master-514-5792c66.
+#
+# Installed artefacts:
+#   include/ggml.h  (+ other ggml public headers)
+#   lib/libggml.a, lib/libggml-base.a, lib/libggml-cpu.a, …
+#   lib/cmake/ggml/  (CMake package config)
+#
+# GPU backend selection via vcpkg features:
+#   metal  -> GGML_METAL=ON  (macOS/iOS, default-feature on Apple platforms)
+#   vulkan -> GGML_VULKAN=ON
+#   cuda   -> GGML_CUDA=ON
+#   opencl -> GGML_OPENCL=ON
+
+vcpkg_from_github(
+    OUT_SOURCE_PATH SOURCE_PATH
+    REPO ggml-org/ggml
+    REF a8db410a252c8c8f2d120c6f2e7133ebe032f35d
+    SHA512 bbca42948d994a33f1d6b2a65b254606f8b563d84d2b456db161cf55b6e01ed6b5eae7ad2c878bc4f03afc664c4209c2e87438fd4171a6f7c77dd907706e51bf
+    HEAD_REF master
+    PATCHES
+        ggml-max-name.patch
+        ggml-opencl-public-header.patch
+        ggml-opencl-graceful-no-devices.patch
+        ggml-config-include-dir.patch
+        ggml-static-core-dl-backends.patch
+        ggml-cpu-static-hybrid.patch
+        ggml-qvac-backend-prefix.patch
+)
+
+# --- GPU feature flags ---
+set(GGML_METAL  OFF)
+set(GGML_VULKAN OFF)
+set(GGML_CUDA   OFF)
+set(GGML_OPENCL OFF)
+
+if("metal" IN_LIST FEATURES)
+    set(GGML_METAL ON)
+endif()
+
+if("vulkan" IN_LIST FEATURES)
+    set(GGML_VULKAN ON)
+endif()
+
+set(GGML_CUDA_COMPILER_OPTION "")
+
+if("cuda" IN_LIST FEATURES)
+    set(GGML_CUDA ON)
+    # Locate nvcc explicitly — /usr/local/cuda/bin may not be on the PATH that
+    # vcpkg's isolated cmake process inherits.
+    find_program(NVCC_EXECUTABLE nvcc
+        PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin
+        NO_DEFAULT_PATH
+    )
+    if(NOT NVCC_EXECUTABLE)
+        find_program(NVCC_EXECUTABLE nvcc REQUIRED)
+    endif()
+    set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}")
+    message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}")
+endif()
+
+if("opencl" IN_LIST FEATURES)
+    set(GGML_OPENCL ON)
+endif()
+
+# --- Android: fetch NDK-matched Vulkan C++ headers ---
+# The NDK ships vulkan/vulkan_core.h (C) but not vulkan/vulkan.hpp (C++).
+# Rather than pulling the vcpkg vulkan-headers package (which may be a
+# different version), we detect the NDK's exact Vulkan version and download
+# the matching C++ headers from KhronosGroup/Vulkan-Headers.
+if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES)
+    include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake)
+    detect_ndk_vulkan_version()
+    message(STATUS "NDK Vulkan version: ${vulkan_version}")
+
+    file(DOWNLOAD
+        "https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz"
+        "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
+        TLS_VERIFY ON
+    )
+    file(ARCHIVE_EXTRACT
+        INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
+        DESTINATION "${SOURCE_PATH}"
+        PATTERNS "*.hpp"
+    )
+    # ggml_add_backend_library adds target_include_directories(${backend} PRIVATE ..)
+    # which resolves to src/ for backends under src/ggml-vulkan/.  Placing the
+    # headers at src/vulkan/*.hpp makes #include <vulkan/vulkan.hpp> resolve.
+    file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/"
+         DESTINATION "${SOURCE_PATH}/src/")
+endif()
+
+# --- Platform options ---
+set(PLATFORM_OPTIONS)
+
+# Hybrid backend mode for Android: GPU backends (Vulkan, OpenCL) are MODULE
+# .so files loaded at runtime via dlopen — no libOpenCL.so NEEDED dependency.
+# The CPU backend is statically linked (GGML_CPU_STATIC) so that SD can call
+# ggml_set_f32, ggml_backend_cpu_init, etc. directly at link time.
+if(VCPKG_TARGET_IS_ANDROID)
+    list(APPEND PLATFORM_OPTIONS
+        -DGGML_BACKEND_DL=ON
+        -DGGML_CPU_STATIC=ON
+        -DGGML_VULKAN_DISABLE_COOPMAT=ON
+        -DGGML_VULKAN_DISABLE_COOPMAT2=ON
+    )
+endif()
+
+# --- Configure & build ---
+vcpkg_cmake_configure(
+    SOURCE_PATH "${SOURCE_PATH}"
+    OPTIONS
+        -DBUILD_SHARED_LIBS=OFF
+        -DGGML_NATIVE=OFF
+        -DGGML_CCACHE=OFF
+        -DGGML_OPENMP=OFF
+        -DGGML_LLAMAFILE=OFF
+        -DGGML_BUILD_TESTS=OFF
+        -DGGML_BUILD_EXAMPLES=OFF
+        -DGGML_METAL=${GGML_METAL}
+        -DGGML_VULKAN=${GGML_VULKAN}
+        -DGGML_CUDA=${GGML_CUDA}
+        -DGGML_OPENCL=${GGML_OPENCL}
+        -DGGML_MAX_NAME=128  # stable-diffusion.cpp requires >= 128
+        ${GGML_CUDA_COMPILER_OPTION}
+        ${PLATFORM_OPTIONS}
+)
+
+vcpkg_cmake_install()
+
+# Install DL backend .so files for Android.  ggml builds each backend as a
+# MODULE target but does NOT install them via cmake install().
+if(VCPKG_TARGET_IS_ANDROID)
+    file(GLOB _backend_sos
+        "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-diffusion-ggml-*.so"
+        "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libggml-*.so"
+    )
+    if(_backend_sos)
+        file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib")
+    endif()
+endif()
+
+# Fix up the CMake package config installed by ggml's own build system.
+vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml)
+
+# ggml installs a .pc to share/pkgconfig; move it to lib/pkgconfig and fix
+# absolute paths so vcpkg's post-build checks pass.
+if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc")
+    file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig")
+    file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc"
+                "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc")
+endif()
+if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc")
+    file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig")
+    file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc"
+                "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc")
+endif()
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig"
+                    "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig")
+vcpkg_fixup_pkgconfig()
+
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")
+
+# DL backends are only built for release; debug build produces fewer binaries.
+set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled)
+
+file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}")
+vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/usage b/packages/lib-infer-diffusion/vcpkg/ports/ggml/usage
new file mode 100644
index 0000000000..9b23041f03
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/usage
@@ -0,0 +1,10 @@
+The package ggml provides CMake integration:
+
+  find_package(ggml CONFIG REQUIRED)
+  target_link_libraries(main PRIVATE ggml::ggml)
+
+Available vcpkg features:
+  metal  - Metal GPU backend (macOS/iOS, auto-enabled on Apple)
+  vulkan - Vulkan GPU backend
+  cuda   - CUDA GPU backend
+  opencl - OpenCL GPU backend
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/ggml/vcpkg.json b/packages/lib-infer-diffusion/vcpkg/ports/ggml/vcpkg.json
new file mode 100644
index 0000000000..ac5e6a371c
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/ggml/vcpkg.json
@@ -0,0 +1,49 @@
+{
+  "name": "ggml",
+  "version-date": "2026-01-30",
+  "port-version": 1,
+  "description": "Tensor library for machine learning (ggml-org fork, pinned to commit used by stable-diffusion.cpp master-514-5792c66)",
+  "homepage": "https://github.com/ggml-org/ggml",
+  "license": "MIT",
+  "dependencies": [
+    {
+      "name": "vcpkg-cmake",
+      "host": true
+    },
+    {
+      "name": "vcpkg-cmake-config",
+      "host": true
+    }
+  ],
+  "default-features": [
+    {
+      "name": "metal",
+      "platform": "osx | ios"
+    },
+    {
+      "name": "opencl",
+      "platform": "android"
+    },
+    {
+      "name": "vulkan",
+      "platform": "windows | linux | android"
+    }
+  ],
+  "features": {
+    "cuda": {
+      "description": "Enable CUDA GPU backend"
+    },
+    "metal": {
+      "description": "Enable Metal GPU backend (macOS/iOS)"
+    },
+    "opencl": {
+      "description": "Enable OpenCL GPU backend",
+      "dependencies": [
+        "opencl"
+      ]
+    },
+    "vulkan": {
+      "description": "Enable Vulkan GPU backend"
+    }
+  }
+}
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/abort-callback.patch b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/abort-callback.patch
new file mode 100644
index 0000000000..e7b4a9c67e
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/abort-callback.patch
@@ -0,0 +1,73 @@
+diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h
+index 51b2b32..8da9adf 100644
+--- a/include/stable-diffusion.h
++++ b/include/stable-diffusion.h
+@@ -331,10 +331,12 @@ typedef struct sd_ctx_t sd_ctx_t;
+ 
+ typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
+ typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
++typedef bool (*sd_abort_cb_t)(void* data);
+ typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy, void* data);
+ 
+ SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
+ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
++SD_API void sd_set_abort_callback(sd_abort_cb_t cb, void* data);
+ SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
+ SD_API int32_t sd_get_num_physical_cores();
+ SD_API const char* sd_get_system_info();
+diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
+index d769d45..d51f039 100644
+--- a/src/stable-diffusion.cpp
++++ b/src/stable-diffusion.cpp
+@@ -2193,6 +2193,9 @@ public:
+                 int showstep = std::abs(step);
+                 pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep);
+                 // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000);
++                if (sd_abort_requested()) {
++                    return (ggml_tensor*)nullptr;
++                }
+             }
+             return denoised;
+         };
+diff --git a/src/util.cpp b/src/util.cpp
+index a94cfd9..343815d 100644
+--- a/src/util.cpp
++++ b/src/util.cpp
+@@ -270,6 +270,9 @@ int32_t sd_get_num_physical_cores() {
+ static sd_progress_cb_t sd_progress_cb = nullptr;
+ void* sd_progress_cb_data              = nullptr;
+ 
++static sd_abort_cb_t sd_abort_cb = nullptr;
++static void* sd_abort_cb_data   = nullptr;
++
+ static sd_preview_cb_t sd_preview_cb = nullptr;
+ static void* sd_preview_cb_data      = nullptr;
+ preview_t sd_preview_mode            = PREVIEW_NONE;
+@@ -423,6 +426,15 @@ void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
+     sd_progress_cb      = cb;
+     sd_progress_cb_data = data;
+ }
++
++void sd_set_abort_callback(sd_abort_cb_t cb, void* data) {
++    sd_abort_cb      = cb;
++    sd_abort_cb_data = data;
++}
++
++bool sd_abort_requested() {
++    return sd_abort_cb && sd_abort_cb(sd_abort_cb_data);
++}
+ void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode, int interval, bool denoised, bool noisy, void* data) {
+     sd_preview_cb       = cb;
+     sd_preview_cb_data  = data;
+diff --git a/src/util.h b/src/util.h
+index 7dee7bf..254041e 100644
+--- a/src/util.h
++++ b/src/util.h
+@@ -69,6 +69,7 @@ protected:
+ std::string path_join(const std::string& p1, const std::string& p2);
+ std::vector<std::string> split_string(const std::string& str, char delimiter);
+ void pretty_progress(int step, int steps, float time);
++bool sd_abort_requested();
+ 
+ void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
+ 
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/fix-failure-path-cleanup.patch b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/fix-failure-path-cleanup.patch
new file mode 100644
index 0000000000..cc662a05ba
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/fix-failure-path-cleanup.patch
@@ -0,0 +1,32 @@
+diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
+index d769d45..db3f242 100644
+--- a/src/stable-diffusion.cpp
++++ b/src/stable-diffusion.cpp
+@@ -2203,7 +2203,11 @@ public:
+                 control_net->free_control_ctx();
+                 control_net->free_compute_buffer();
+             }
+-            diffusion_model->free_compute_buffer();
++            // Upstream bug: abort/failure path freed the wrong model's compute
++            // buffer (diffusion_model instead of work_diffusion_model).  The
++            // success path at line ~2218 frees work_diffusion_model -- this must
++            // match, otherwise sd_ctx state is corrupted and reuse segfaults.
++            work_diffusion_model->free_compute_buffer();
+             return NULL;
+         }
+ 
+@@ -3796,6 +3800,13 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
+ 
+     size_t t2 = ggml_time_ms();
+ 
++    // When generate_image_internal() returns NULL (abort or failure),
++    // work_ctx is never freed inside that function -- it only frees on
++    // the success path.  Free it here to avoid leaking the ggml context.
++    if (result_images == nullptr) {
++        ggml_free(work_ctx);
++    }
++
+     LOG_INFO("generate_image completed in %.2fs", (t2 - t0) * 1.0f / 1000);
+ 
+     return result_images;
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/portfile.cmake b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/portfile.cmake
new file mode 100644
index 0000000000..cc38e5eb20
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/portfile.cmake
@@ -0,0 +1,72 @@
+# stable-diffusion.cpp vcpkg overlay port
+#
+# Builds the stable-diffusion.cpp inference library and links against the
+# system-installed ggml (provided by the separate ggml overlay port).
+#
+# Installed artefacts:
+#   include/stable-diffusion.h   (main C API)
+#   lib/libstable-diffusion.a    (static library)
+#   share/stable-diffusion-cpp/  (CMake package config)
+#
+# GPU backend selection is handled at runtime via ggml's backend registry.
+# The sd-generic-backend-init patch replaces SD's backend-specific init
+# with ggml_backend_init_by_type() which works with both statically linked
+# and dynamically loaded backends.
+#
+# Pinned to release tag master-514-5792c66 (2026-03-01).
+vcpkg_from_github(
+    OUT_SOURCE_PATH SOURCE_PATH
+    REPO tetherto/qvac-ext-stable-diffusion.cpp
+    REF 5792c668798083f9f6d57dac66fbc62ddfdac405
+    SHA512 9bdf945d27ea24d9ea8218a7b875b6d1346711122723453840f4648cd862de3be28e37736ce0ef46ed304cbe810593dfa4264eec969c9e0c8dafb854298280f7
+    HEAD_REF master
+    PATCHES
+        sd-generic-backend-init.patch
+        sd-android-vulkan-diagnostics.patch
+        abort-callback.patch
+        fix-failure-path-cleanup.patch
+)
+
+set(SD_FLASH_ATTN OFF)
+
+if("flash-attn" IN_LIST FEATURES)
+    set(SD_FLASH_ATTN ON)
+endif()
+
+# Only build Release — debug builds are not needed for the prebuild and can
+# fail with MSVC iterator-debug-level mismatches.
+set(VCPKG_BUILD_TYPE release)
+
+# --- Configure & build ---
+vcpkg_cmake_configure(
+    SOURCE_PATH "${SOURCE_PATH}"
+    DISABLE_PARALLEL_CONFIGURE
+    OPTIONS
+        -DSD_BUILD_EXAMPLES=OFF
+        -DSD_BUILD_SHARED_LIBS=OFF
+        -DSD_USE_SYSTEM_GGML=ON
+        -DSD_FLASH_ATTN=${SD_FLASH_ATTN}
+    MAYBE_UNUSED_VARIABLES
+        SD_FLASH_ATTN
+)
+
+vcpkg_cmake_install()
+
+# --- CMake package config ---
+# Upstream does not export a CMake config, so we ship our own that defines
+# stable-diffusion::stable-diffusion with ggml as a transitive dependency.
+file(INSTALL
+    "${CMAKE_CURRENT_LIST_DIR}/stable-diffusion-cppConfig.cmake"
+    "${CMAKE_CURRENT_LIST_DIR}/stable-diffusion-cppConfigVersion.cmake"
+    DESTINATION "${CURRENT_PACKAGES_DIR}/share/stable-diffusion-cpp"
+)
+
+# --- Cleanup ---
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")
+
+set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled)
+
+file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}")
+vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-android-vulkan-diagnostics.patch b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-android-vulkan-diagnostics.patch
new file mode 100644
index 0000000000..d641f9e6d1
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-android-vulkan-diagnostics.patch
@@ -0,0 +1,149 @@
+diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
+index 004ddabf..f7a97fc1 100644
+--- a/src/stable-diffusion.cpp
++++ b/src/stable-diffusion.cpp
+@@ -97,6 +97,34 @@ void suppress_pp(int step, int steps, float time, void* data) {
+     return;
+ }
+ 
++static const char* ggml_backend_device_type_name(enum ggml_backend_dev_type type) {
++    switch (type) {
++        case GGML_BACKEND_DEVICE_TYPE_CPU:
++            return "CPU";
++        case GGML_BACKEND_DEVICE_TYPE_GPU:
++            return "GPU";
++        case GGML_BACKEND_DEVICE_TYPE_IGPU:
++            return "IGPU";
++        case GGML_BACKEND_DEVICE_TYPE_ACCEL:
++            return "ACCEL";
++        default:
++            return "UNKNOWN";
++    }
++}
++
++static void ggml_sd_log_bridge(enum ggml_log_level level, const char* text, void* user_data) {
++    (void)user_data;
++    if (text == nullptr) {
++        return;
++    }
++    switch (level) {
++        case GGML_LOG_LEVEL_DEBUG: LOG_DEBUG("ggml: %s", text); break;
++        case GGML_LOG_LEVEL_WARN:  LOG_WARN("ggml: %s", text); break;
++        case GGML_LOG_LEVEL_ERROR: LOG_ERROR("ggml: %s", text); break;
++        default:                   LOG_INFO("ggml: %s", text); break;
++    }
++}
++
+ /*=============================================== StableDiffusionGGML ================================================*/
+ 
+ class StableDiffusionGGML {
+@@ -196,6 +223,9 @@ public:
+ 
+         if (preferred_backend == SD_BACKEND_PREF_OPENCL) {
+             const size_t n_devices = ggml_backend_dev_count();
++            bool found_opencl_device = false;
++            bool failed_opencl_init = false;
++            LOG_INFO("OpenCL preference: probing %zu device(s)", n_devices);
+             for (size_t i = 0; i < n_devices; ++i) {
+                 ggml_backend_dev_t dev = ggml_backend_dev_get(i);
+                 const enum ggml_backend_dev_type dev_type = ggml_backend_dev_type(dev);
+@@ -204,6 +234,9 @@ public:
+                     continue;
+                 }
+                 const char* name = ggml_backend_dev_name(dev);
++                const char* desc = ggml_backend_dev_description(dev);
++                LOG_INFO("OpenCL probe candidate[%zu]: name='%s' desc='%s' type=%s", i, name ? name : "<null>", desc ? desc : "<null>", ggml_backend_device_type_name(dev_type));
++
+                 if (!name) {
+                     continue;
+                 }
+@@ -212,18 +245,78 @@ public:
+                 if (!is_opencl) {
+                     continue;
+                 }
++
++                found_opencl_device = true;
+                 backend = ggml_backend_dev_init(dev, NULL);
+                 if (backend) {
+                     LOG_INFO("Using OpenCL backend '%s'", name);
+                     LOG_INFO("Backend initialized successfully (OpenCL preference)");
+                     return;
+                 }
++                failed_opencl_init = true;
++                LOG_WARN("OpenCL backend candidate '%s' failed to initialize", name);
++            }
++
++            if (!found_opencl_device) {
++                LOG_WARN("OpenCL preference requested but no OpenCL GPU device was enumerated; falling back to generic GPU selection");
++            } else if (failed_opencl_init) {
++                LOG_WARN("OpenCL preference requested but all OpenCL device init attempts failed; falling back to generic GPU selection");
++            } else {
++                LOG_WARN("OpenCL preference requested but no OpenCL backend could be initialized; falling back to generic GPU selection");
+             }
+-            LOG_WARN("OpenCL preference requested but no OpenCL backend could be initialized; falling back to generic GPU selection");
+         }
+ 
+-        backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, NULL);
++        const size_t n_devices = ggml_backend_dev_count();
++        bool attempted_gpu_device_init = false;
++
++        // Prefer dedicated GPUs first.
++        for (size_t i = 0; i < n_devices; ++i) {
++            ggml_backend_dev_t dev = ggml_backend_dev_get(i);
++            const enum ggml_backend_dev_type dev_type = ggml_backend_dev_type(dev);
++            if (dev_type != GGML_BACKEND_DEVICE_TYPE_GPU) {
++                continue;
++            }
++            const char* name = ggml_backend_dev_name(dev);
++            const char* desc = ggml_backend_dev_description(dev);
++            LOG_INFO("GPU init candidate[%zu]: name='%s' desc='%s' type=%s", i, name ? name : "<null>", desc ? desc : "<null>", ggml_backend_device_type_name(dev_type));
++            attempted_gpu_device_init = true;
++            backend = ggml_backend_dev_init(dev, NULL);
++            if (backend) {
++                LOG_INFO("Initialized GPU backend from explicit device candidate[%zu] '%s'", i, name ? name : "<null>");
++                break;
++            }
++            LOG_WARN("Failed to initialize GPU device candidate[%zu] '%s'", i, name ? name : "<null>");
++        }
++
++        // If no dedicated GPU worked, try integrated GPUs.
++        if (!backend) {
++            for (size_t i = 0; i < n_devices; ++i) {
++                ggml_backend_dev_t dev = ggml_backend_dev_get(i);
++                const enum ggml_backend_dev_type dev_type = ggml_backend_dev_type(dev);
++                if (dev_type != GGML_BACKEND_DEVICE_TYPE_IGPU) {
++                    continue;
++                }
++                const char* name = ggml_backend_dev_name(dev);
++                const char* desc = ggml_backend_dev_description(dev);
++                LOG_INFO("IGPU init candidate[%zu]: name='%s' desc='%s' type=%s", i, name ? name : "<null>", desc ? desc : "<null>", ggml_backend_device_type_name(dev_type));
++                attempted_gpu_device_init = true;
++                backend = ggml_backend_dev_init(dev, NULL);
++                if (backend) {
++                    LOG_INFO("Initialized IGPU backend from explicit device candidate[%zu] '%s'", i, name ? name : "<null>");
++                    break;
++                }
++                LOG_WARN("Failed to initialize IGPU device candidate[%zu] '%s'", i, name ? name : "<null>");
++            }
++        }
++
+         if (!backend) {
++            if (attempted_gpu_device_init) {
++                LOG_WARN("All explicit GPU device init attempts failed; trying generic GPU init by type");
++            }
++            backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, NULL);
++        }
++
++        if (!backend) {
+             LOG_WARN("GPU backend initialization failed; falling back to CPU backend");
+             backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
+             if (!backend) {
+@@ -267,7 +324,7 @@ public:
+             sampler_rng = rng;
+         }
+ 
+-        ggml_log_set(ggml_log_callback_default, nullptr);
++        ggml_log_set(ggml_sd_log_bridge, nullptr);
+ 
+         init_backend(sd_ctx_params->preferred_gpu_backend);
+ 
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-generic-backend-init.patch b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-generic-backend-init.patch
new file mode 100644
index 0000000000..a33e655d0e
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/sd-generic-backend-init.patch
@@ -0,0 +1,177 @@
+diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h
+index 51b2b329..7bfd6816 100644
+--- a/include/stable-diffusion.h
++++ b/include/stable-diffusion.h
+@@ -145,6 +145,13 @@ enum lora_apply_mode_t {
+     LORA_APPLY_MODE_COUNT,
+ };
+ 
++enum sd_backend_preference_t {
++    SD_BACKEND_PREF_AUTO = 0,
++    SD_BACKEND_PREF_CPU,
++    SD_BACKEND_PREF_GPU,
++    SD_BACKEND_PREF_OPENCL,
++};
++
+ typedef struct {
+     bool enabled;
+     int tile_size_x;
+@@ -201,6 +208,7 @@ typedef struct {
+     bool chroma_use_t5_mask;
+     int chroma_t5_mask_pad;
+     bool qwen_image_zero_cond_t;
++    enum sd_backend_preference_t preferred_gpu_backend;
+ } sd_ctx_params_t;
+ 
+ typedef struct {
+diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
+index d769d45c..004ddabf 100644
+--- a/src/stable-diffusion.cpp
++++ b/src/stable-diffusion.cpp
+@@ -164,60 +164,81 @@ public:
+         ggml_backend_free(backend);
+     }
+ 
+-    void init_backend() {
+-#ifdef SD_USE_CUDA
+-        LOG_DEBUG("Using CUDA backend");
+-        backend = ggml_backend_cuda_init(0);
+-#endif
+-#ifdef SD_USE_METAL
+-        LOG_DEBUG("Using Metal backend");
+-        backend = ggml_backend_metal_init();
+-#endif
+-#ifdef SD_USE_VULKAN
+-        LOG_DEBUG("Using Vulkan backend");
+-        size_t device          = 0;
+-        const int device_count = ggml_backend_vk_get_device_count();
+-        if (device_count) {
+-            const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
+-            if (SD_VK_DEVICE != nullptr) {
+-                std::string sd_vk_device_str = SD_VK_DEVICE;
+-                try {
+-                    device = std::stoull(sd_vk_device_str);
+-                } catch (const std::invalid_argument&) {
+-                    LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE);
+-                    device = 0;
+-                } catch (const std::out_of_range&) {
+-                    LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE);
+-                    device = 0;
++    void init_backend(enum sd_backend_preference_t preferred_backend) {
++        const char* pref_name = "auto";
++        if (preferred_backend == SD_BACKEND_PREF_CPU) {
++            pref_name = "cpu";
++        } else if (preferred_backend == SD_BACKEND_PREF_GPU) {
++            pref_name = "gpu";
++        } else if (preferred_backend == SD_BACKEND_PREF_OPENCL) {
++            pref_name = "opencl";
++        }
++        LOG_INFO("Backend preference requested: %s", pref_name);
++
++        if (getenv("SD_CPU_ONLY")) {
++            LOG_INFO("SD_CPU_ONLY set - using CPU backend");
++            backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
++            if (!backend) {
++                LOG_ERROR("SD_CPU_ONLY set but CPU backend failed to initialize");
++            }
++            return;
++        }
++
++        if (preferred_backend == SD_BACKEND_PREF_CPU) {
++            backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
++            if (backend) {
++                LOG_INFO("Initialized CPU backend from explicit preference");
++            } else {
++                LOG_WARN("CPU backend preference requested but CPU backend initialization failed");
++            }
++            return;
++        }
++
++        if (preferred_backend == SD_BACKEND_PREF_OPENCL) {
++            const size_t n_devices = ggml_backend_dev_count();
++            for (size_t i = 0; i < n_devices; ++i) {
++                ggml_backend_dev_t dev = ggml_backend_dev_get(i);
++                const enum ggml_backend_dev_type dev_type = ggml_backend_dev_type(dev);
++                if (dev_type != GGML_BACKEND_DEVICE_TYPE_GPU &&
++                    dev_type != GGML_BACKEND_DEVICE_TYPE_IGPU) {
++                    continue;
++                }
++                const char* name = ggml_backend_dev_name(dev);
++                if (!name) {
++                    continue;
+                 }
+-                if (device >= device_count) {
+-                    LOG_WARN("Cannot find targeted vulkan device (%llu). Falling back to device 0.", device);
+-                    device = 0;
++                const bool is_opencl = strstr(name, "opencl") != NULL ||
++                                       strstr(name, "OpenCL") != NULL;
++                if (!is_opencl) {
++                    continue;
++                }
++                backend = ggml_backend_dev_init(dev, NULL);
++                if (backend) {
++                    LOG_INFO("Using OpenCL backend '%s'", name);
++                    LOG_INFO("Backend initialized successfully (OpenCL preference)");
++                    return;
+                 }
+             }
+-            LOG_INFO("Vulkan: Using device %llu", device);
+-            backend = ggml_backend_vk_init(device);
+-        }
+-        if (!backend) {
+-            LOG_WARN("Failed to initialize Vulkan backend");
++            LOG_WARN("OpenCL preference requested but no OpenCL backend could be initialized; falling back to generic GPU selection");
+         }
+-#endif
+-#ifdef SD_USE_OPENCL
+-        LOG_DEBUG("Using OpenCL backend");
+-        // ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
+-        backend = ggml_backend_opencl_init();
++
++        backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, NULL);
+         if (!backend) {
+-            LOG_WARN("Failed to initialize OpenCL backend");
++            LOG_WARN("GPU backend initialization failed; falling back to CPU backend");
++            backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
++            if (!backend) {
++                LOG_ERROR("CPU fallback backend initialization failed");
++            }
++        } else {
++            LOG_INFO("Initialized generic GPU backend");
+         }
+-#endif
+-#ifdef SD_USE_SYCL
+-        LOG_DEBUG("Using SYCL backend");
+-        backend = ggml_backend_sycl_init(0);
+-#endif
+ 
+-        if (!backend) {
+-            LOG_DEBUG("Using CPU backend");
+-            backend = ggml_backend_cpu_init();
++        if (backend) {
++            if (ggml_backend_is_cpu(backend)) {
++                LOG_INFO("Final backend type: CPU");
++            } else {
++                LOG_INFO("Final backend type: non-CPU");
++            }
+         }
+     }
+ 
+@@ -248,7 +269,7 @@ public:
+ 
+         ggml_log_set(ggml_log_callback_default, nullptr);
+ 
+-        init_backend();
++        init_backend(sd_ctx_params->preferred_gpu_backend);
+ 
+         ModelLoader model_loader;
+ 
+@@ -2953,6 +2974,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
+     sd_ctx_params->chroma_use_dit_mask     = true;
+     sd_ctx_params->chroma_use_t5_mask      = false;
+     sd_ctx_params->chroma_t5_mask_pad      = 1;
++    sd_ctx_params->preferred_gpu_backend = SD_BACKEND_PREF_AUTO;
+ }
+ 
+ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfig.cmake b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfig.cmake
new file mode 100644
index 0000000000..97ea86273f
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfig.cmake
@@ -0,0 +1,30 @@
+include(CMakeFindDependencyMacro)
+find_dependency(ggml CONFIG)
+
+get_filename_component(_SD_CPP_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
+
+find_library(STABLE_DIFFUSION_LIBRARY
+    NAMES stable-diffusion
+    PATHS "${_SD_CPP_PREFIX}/lib"
+    NO_DEFAULT_PATH
+    REQUIRED
+)
+
+find_path(STABLE_DIFFUSION_INCLUDE_DIR
+    NAMES stable-diffusion.h
+    PATHS "${_SD_CPP_PREFIX}/include"
+    NO_DEFAULT_PATH
+    REQUIRED
+)
+
+if(NOT TARGET stable-diffusion::stable-diffusion)
+    add_library(stable-diffusion::stable-diffusion STATIC IMPORTED)
+    set_target_properties(stable-diffusion::stable-diffusion PROPERTIES
+        IMPORTED_LOCATION             "${STABLE_DIFFUSION_LIBRARY}"
+        INTERFACE_INCLUDE_DIRECTORIES "${STABLE_DIFFUSION_INCLUDE_DIR}"
+        INTERFACE_LINK_LIBRARIES      "ggml::ggml"
+    )
+endif()
+
+unset(_SD_CPP_PREFIX)
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfigVersion.cmake b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfigVersion.cmake
new file mode 100644
index 0000000000..971b364efd
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/stable-diffusion-cppConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "0.0.1")
+if(PACKAGE_FIND_VERSION VERSION_GREATER PACKAGE_VERSION)
+    set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
+        set(PACKAGE_VERSION_EXACT TRUE)
+    endif()
+endif()
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/usage b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/usage
new file mode 100644
index 0000000000..0b20cbf81e
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/usage
@@ -0,0 +1,17 @@
+The package stable-diffusion-cpp provides CMake integration:
+
+  find_package(stable-diffusion-cpp CONFIG REQUIRED)
+  target_link_libraries(main PRIVATE stable-diffusion::stable-diffusion)
+
+The following header is available:
+  #include <stable-diffusion.h>   // main C API
+
+For image I/O (stb_image, stb_image_write), depend on the stb vcpkg package
+directly rather than relying on headers bundled in this port.
+
+GPU backends are controlled through the ggml dependency:
+  metal  - Metal GPU backend (macOS/iOS, auto-enabled on Apple)
+  vulkan - Vulkan GPU backend
+  cuda   - CUDA GPU backend
+  opencl - OpenCL GPU backend (Android/Adreno)
+
diff --git a/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/vcpkg.json b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/vcpkg.json
new file mode 100644
index 0000000000..1ea8040713
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/ports/stable-diffusion-cpp/vcpkg.json
@@ -0,0 +1,84 @@
+{
+  "name": "stable-diffusion-cpp",
+  "version-date": "2026-03-01",
+  "description": "Diffusion model inference in pure C/C++ (SD1.x, SD2.x, SDXL, SD3, FLUX, Wan, ...)",
+  "homepage": "https://github.com/tetherto/qvac-ext-stable-diffusion.cpp",
+  "license": "MIT",
+  "dependencies": [
+    {
+      "name": "ggml",
+      "version>=": "2026-01-30"
+    },
+    {
+      "name": "vcpkg-cmake",
+      "host": true
+    },
+    {
+      "name": "vcpkg-cmake-config",
+      "host": true
+    }
+  ],
+  "default-features": [
+    {
+      "name": "metal",
+      "platform": "osx | ios"
+    },
+    {
+      "name": "opencl",
+      "platform": "android"
+    },
+    {
+      "name": "vulkan",
+      "platform": "windows | linux | android"
+    }
+  ],
+  "features": {
+    "cuda": {
+      "description": "Enable CUDA GPU acceleration",
+      "dependencies": [
+        {
+          "name": "ggml",
+          "features": [
+            "cuda"
+          ]
+        }
+      ]
+    },
+    "flash-attn": {
+      "description": "Enable Flash Attention for memory efficiency"
+    },
+    "metal": {
+      "description": "Enable Metal GPU acceleration (macOS/iOS)",
+      "dependencies": [
+        {
+          "name": "ggml",
+          "features": [
+            "metal"
+          ]
+        }
+      ]
+    },
+    "opencl": {
+      "description": "Enable OpenCL GPU acceleration (Android/Adreno)",
+      "dependencies": [
+        {
+          "name": "ggml",
+          "features": [
+            "opencl"
+          ]
+        }
+      ]
+    },
+    "vulkan": {
+      "description": "Enable Vulkan GPU acceleration",
+      "dependencies": [
+        {
+          "name": "ggml",
+          "features": [
+            "vulkan"
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/packages/lib-infer-diffusion/vcpkg/toolchains/linux-clang.cmake b/packages/lib-infer-diffusion/vcpkg/toolchains/linux-clang.cmake
new file mode 100644
index 0000000000..542aa9dba1
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/toolchains/linux-clang.cmake
@@ -0,0 +1,4 @@
+set(CMAKE_C_COMPILER "clang-19")
+set(CMAKE_CXX_COMPILER "clang++-19")
+
+include("$ENV{VCPKG_ROOT}/scripts/toolchains/linux.cmake")
diff --git a/packages/lib-infer-diffusion/vcpkg/triplets/arm64-linux.cmake b/packages/lib-infer-diffusion/vcpkg/triplets/arm64-linux.cmake
new file mode 100644
index 0000000000..77c0e6b318
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/triplets/arm64-linux.cmake
@@ -0,0 +1,9 @@
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+
+set(VCPKG_CHAINLOAD_TOOLCHAIN_FILE "${CMAKE_CURRENT_LIST_DIR}/../toolchains/linux-clang.cmake")
+set(VCPKG_C_FLAGS "-fPIC")
+set(VCPKG_CXX_FLAGS "-fPIC -stdlib=libc++")
+set(VCPKG_LINKER_FLAGS "-stdlib=libc++")
diff --git a/packages/lib-infer-diffusion/vcpkg/triplets/x64-linux.cmake b/packages/lib-infer-diffusion/vcpkg/triplets/x64-linux.cmake
new file mode 100644
index 0000000000..7660720b49
--- /dev/null
+++ b/packages/lib-infer-diffusion/vcpkg/triplets/x64-linux.cmake
@@ -0,0 +1,9 @@
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+
+set(VCPKG_CHAINLOAD_TOOLCHAIN_FILE "${CMAKE_CURRENT_LIST_DIR}/../toolchains/linux-clang.cmake")
+set(VCPKG_C_FLAGS "-fPIC")
+set(VCPKG_CXX_FLAGS "-fPIC -stdlib=libc++")
+set(VCPKG_LINKER_FLAGS "-stdlib=libc++")