diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/.gitignore b/packages/qvac-lib-infer-stable-diffusion-cpp/.gitignore
new file mode 100644
index 0000000000..870b5270ee
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/.gitignore
@@ -0,0 +1,22 @@
+.vs/
+build/
+dist/
+models/
+store/
+node_modules/
+prebuilds/
+
+.npmrc
+package-lock.json
+.cache/
+.idea/
+**/store/
+.DS_Store
+logs/
+*.gguf
+*.safetensors
+*.ckpt
+*.log
+.clang-tidy
+# Added by qvac-lint-cpp
+.clang-format
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/CMakeLists.txt b/packages/qvac-lib-infer-stable-diffusion-cpp/CMakeLists.txt
new file mode 100644
index 0000000000..af330ac50b
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/CMakeLists.txt
@@ -0,0 +1,119 @@
+cmake_minimum_required(VERSION 3.25)
+
+option(ANDROID_STL "Android STL linkage" c++_shared)
+option(BUILD_TESTING "Build tests" OFF)
+if(BUILD_TESTING)
+  list(APPEND VCPKG_MANIFEST_FEATURES "tests")
+endif()
+
+find_package(cmake-bare REQUIRED PATHS node_modules/cmake-bare)
+find_package(cmake-vcpkg REQUIRED PATHS node_modules/cmake-vcpkg)
+
+project(qvac-lib-inference-addon-sd C CXX)
+
+find_path(VCPKG_INSTALLED_PATH share/qvac-lint-cpp/.clang-format REQUIRED)
+configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-format
+               ${CMAKE_CURRENT_SOURCE_DIR}/.clang-format COPYONLY)
+configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-tidy
+               ${CMAKE_CURRENT_SOURCE_DIR}/.clang-tidy COPYONLY)
+
+find_path(PICOJSON_INCLUDE_DIRS "picojson/picojson.h")
+find_path(QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS "qvac-lib-inference-addon-cpp/JsInterface.hpp")
+
+# stable-diffusion.cpp – uses the CMake config installed by the overlay port
+find_package(stable-diffusion-cpp CONFIG REQUIRED)
+
+# stb headers are installed into the same include dir by the port
+find_path(STB_IMAGE_WRITE_INCLUDE_DIR "stb_image_write.h" REQUIRED)
+
+if(WIN32)
+  add_definitions(-DNOMINMAX -DWIN32_MEAN_AND_LEAN -DNOGDI)
+endif()
+
+bare_target(bare_target_value)
+bare_module_target("." unused_target NAME module_name VERSION unused_version)
+set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}")
+message("Building qvac-lib-inference-addon-sd with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'")
+
+# On Linux/Android, install ggml dynamic backend libraries alongside the addon
+set(BACKEND_DL_LIBS "")
+if((ANDROID OR UNIX) AND NOT APPLE)
+  # ggml backends are built inside stable-diffusion.cpp port
+  foreach(_backend ${GGML_AVAILABLE_BACKENDS})
+    list(APPEND BACKEND_DL_LIBS INSTALL TARGET ggml::${_backend})
+  endforeach()
+endif()
+
+add_bare_module(qvac-lib-inference-addon-sd EXPORTS ${BACKEND_DL_LIBS})
+  set(ADDON_SOURCES
+    ${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/model-interface/SdModel.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/utils/LoggingMacros.cpp
+    ${PROJECT_SOURCE_DIR}/addon/src/utils/BackendSelection.cpp
+  )
+
+  target_sources(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      ${ADDON_SOURCES}
+  )
+
+  target_include_directories(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      ${PICOJSON_INCLUDE_DIRS}
+      ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS}
+      ${STB_IMAGE_WRITE_INCLUDE_DIR}
+      ${PROJECT_SOURCE_DIR}/addon/src
+  )
+
+  target_link_libraries(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      stable-diffusion::stable-diffusion
+  )
+
+  # Link Metal framework on Apple platforms
+  if(APPLE)
+    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
+    find_library(METAL_LIBRARY Metal REQUIRED)
+    find_library(METALKIT_LIBRARY MetalKit REQUIRED)
+    find_library(METALPERFORMANCESHADERS_LIBRARY MetalPerformanceShaders)
+    target_link_libraries(
+      ${qvac-lib-inference-addon-sd}
+      PRIVATE
+        ${FOUNDATION_LIBRARY}
+        ${METAL_LIBRARY}
+        ${METALKIT_LIBRARY}
+    )
+    if(METALPERFORMANCESHADERS_LIBRARY)
+      target_link_libraries(${qvac-lib-inference-addon-sd} PRIVATE ${METALPERFORMANCESHADERS_LIBRARY})
+    endif()
+  endif()
+
+  # Link OpenCL on Android
+  if(ANDROID)
+    find_package(opencl CONFIG)
+    if(opencl_FOUND)
+      target_link_libraries(${qvac-lib-inference-addon-sd} PRIVATE opencl)
+    endif()
+  endif()
+
+target_compile_features(${qvac-lib-inference-addon-sd} PRIVATE cxx_std_20)
+target_compile_definitions(${qvac-lib-inference-addon-sd} PUBLIC JS_LOGGER)
+target_compile_definitions(${qvac-lib-inference-addon-sd} PRIVATE BACKENDS_SUBDIR="${BACKENDS_SUBDIR_VALUE}")
+
+if(BUILD_TESTING)
+  find_package(GTest CONFIG REQUIRED)
+  include(GoogleTest)
+  enable_testing()
+  add_subdirectory(test/unit)
+endif()
+
+if(WIN32)
+  target_link_libraries(
+    ${qvac-lib-inference-addon-sd}
+    PRIVATE
+      msvcrt.lib
+  )
+endif()
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon.js b/packages/qvac-lib-infer-stable-diffusion-cpp/addon.js
new file mode 100644
index 0000000000..a46def5546
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon.js
@@ -0,0 +1,70 @@
+'use strict'
+
+const path = require('bare-path')
+
+/**
+ * JavaScript wrapper around the native stable-diffusion.cpp addon.
+ * Manages the native handle lifecycle and bridges JS ↔ C++.
+ */
+class SdInterface {
+  /**
+   * @param {object} binding - The native addon binding (from require.addon())
+   * @param {object} configurationParams - Configuration for the SD context
+   * @param {string} configurationParams.path - Local file path to the model weights
+   * @param {object} [configurationParams.config] - SD-specific configuration options
+   * @param {Function} outputCb - Called on any generation event (started, progress, output, error)
+   */
+  constructor (binding, configurationParams, outputCb) {
+    this._binding = binding
+
+    if (!configurationParams.config) {
+      configurationParams.config = {}
+    }
+
+    if (!configurationParams.config.backendsDir) {
+      configurationParams.config.backendsDir = path.join(__dirname, 'prebuilds')
+    }
+
+    this._handle = this._binding.createInstance(
+      this,
+      configurationParams,
+      outputCb
+    )
+  }
+
+  /**
+   * Moves addon to the LISTENING state after initialization.
+   */
+  async activate () {
+    this._binding.activate(this._handle)
+  }
+
+  /**
+   * Cancel the current generation job.
+   */
+  async cancel () {
+    if (!this._handle) return
+    await this._binding.cancel(this._handle)
+  }
+
+  /**
+   * Run a generation job with the given parameters.
+   * @param {object} params - Generation parameters (will be JSON-serialized)
+   * @returns {Promise<boolean>} true if job was accepted, false if busy
+   */
+  async runJob (params) {
+    const paramsJson = JSON.stringify(params)
+    return this._binding.runJob(this._handle, [{ type: 'text', input: paramsJson }])
+  }
+
+  /**
+   * Unload the model and release all native resources.
+   */
+  async unload () {
+    if (!this._handle) return
+    this._binding.destroyInstance(this._handle)
+    this._handle = null
+  }
+}
+
+module.exports = { SdInterface }
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/addon/AddonJs.hpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/addon/AddonJs.hpp
new file mode 100644
index 0000000000..e460aef3d0
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/addon/AddonJs.hpp
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <qvac-lib-inference-addon-cpp/JsInterface.hpp>
+#include <qvac-lib-inference-addon-cpp/JsUtils.hpp>
+#include <qvac-lib-inference-addon-cpp/ModelInterfaces.hpp>
+#include <qvac-lib-inference-addon-cpp/addon/AddonJs.hpp>
+#include <qvac-lib-inference-addon-cpp/handlers/JsOutputHandlerImplementations.hpp>
+#include <qvac-lib-inference-addon-cpp/handlers/OutputHandler.hpp>
+#include <qvac-lib-inference-addon-cpp/queue/OutputCallbackJs.hpp>
+
+#include "model-interface/SdModel.hpp"
+
+namespace qvac_lib_inference_addon_sd {
+
+inline js_value_t* createInstance(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+  using namespace std;
+
+  JsArgsParser args(env, info);
+
+  // Extract configuration from JS object at args[1]
+  const string modelPath        = args.getMapEntry(1, "path");
+  const string clipLPath        = args.getMapEntry(1, "clipLPath");
+  const string clipGPath        = args.getMapEntry(1, "clipGPath");
+  const string t5XxlPath        = args.getMapEntry(1, "t5XxlPath");
+  const string vaePath          = args.getMapEntry(1, "vaePath");
+  auto configMap                = args.getSubmap(1, "config");
+
+  auto model = make_unique<SdModel>(
+      modelPath, clipLPath, clipGPath, t5XxlPath, vaePath, std::move(configMap));
+
+  // Register output handlers for both progress strings and image byte arrays
+  out_handl::OutputHandlers<out_handl::JsOutputHandlerInterface> outHandlers;
+  outHandlers.add(make_shared<out_handl::JsStringOutputHandler>());
+  outHandlers.add(make_shared<out_handl::JsTypedArrayOutputHandler<uint8_t>>());
+
+  unique_ptr<OutputCallBackInterface> callback = make_unique<OutputCallBackJs>(
+      env,
+      args.get(0, "jsHandle"),
+      args.getFunction(2, "outputCallback"),
+      std::move(outHandlers));
+
+  auto addon = make_unique<AddonJs>(env, std::move(callback), std::move(model));
+
+  return JsInterface::createInstance(env, std::move(addon));
+}
+JSCATCH
+
+inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+  using namespace std;
+
+  JsArgsParser args(env, info);
+  AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance"));
+
+  auto [type, jsInput] = JsInterface::getInput(args);
+
+  if (type != "text") {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "stable-diffusion runJob expects a single text input with JSON params");
+  }
+
+  const string paramsJson =
+      js::String(env, jsInput).as<std::string>(env);
+
+  SdModel::GenerationJob job;
+  job.paramsJson = paramsJson;
+
+  // Queue step-progress updates as JSON strings (handled by JsStringOutputHandler)
+  job.progressCallback = [&instance](const std::string& progressJson) {
+    instance.addonCpp->outputQueue->queueResult(std::any(progressJson));
+  };
+
+  // Queue final image/frame bytes (handled by JsTypedArrayOutputHandler<uint8_t>)
+  job.outputCallback = [&instance](const std::vector<uint8_t>& imageBytes) {
+    instance.addonCpp->outputQueue->queueResult(std::any(imageBytes));
+  };
+
+  return instance.runJob(std::any(std::move(job)));
+}
+JSCATCH
+
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/js-interface/binding.cpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/js-interface/binding.cpp
new file mode 100644
index 0000000000..9d9fb65fe0
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/js-interface/binding.cpp
@@ -0,0 +1,33 @@
+#include <bare.h>
+
+#include "../addon/AddonJs.hpp"
+
+js_value_t*
+qvacLibInferenceAddonSdExports(js_env_t* env, js_value_t* exports) {
+
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define V(name, fn)                                                            \
+  {                                                                            \
+    js_value_t* val;                                                           \
+    if (js_create_function(env, name, -1, fn, nullptr, &val) != 0) {          \
+      return nullptr;                                                          \
+    }                                                                          \
+    if (js_set_named_property(env, exports, name, val) != 0) {                \
+      return nullptr;                                                          \
+    }                                                                          \
+  }
+
+  V("createInstance", qvac_lib_inference_addon_sd::createInstance)
+  V("runJob", qvac_lib_inference_addon_sd::runJob)
+
+  V("activate", qvac_lib_inference_addon_cpp::JsInterface::activate)
+  V("cancel", qvac_lib_inference_addon_cpp::JsInterface::cancel)
+  V("destroyInstance", qvac_lib_inference_addon_cpp::JsInterface::destroyInstance)
+  V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger)
+  V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger)
+
+#undef V
+  return exports;
+}
+
+BARE_MODULE(qvac_lib_inference_addon_sd, qvacLibInferenceAddonSdExports)
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.cpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.cpp
new file mode 100644
index 0000000000..760963b2a5
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.cpp
@@ -0,0 +1,464 @@
+#include "SdModel.hpp"
+
+#include <algorithm>
+#include <chrono>
+#include <cstring>
+#include <sstream>
+#include <stdexcept>
+
+#define STB_IMAGE_IMPLEMENTATION
+#include <stb_image.h>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.h>
+
+#include <picojson/picojson.h>
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+#include <qvac-lib-inference-addon-cpp/Logger.hpp>
+
+#include "utils/LoggingMacros.hpp"
+
+using namespace qvac_lib_inference_addon_cpp;
+using qvac_errors::general_error;
+using qvac_errors::StatusError;
+
+// ---------------------------------------------------------------------------
+// Thread-local generation context used by the stable-diffusion.cpp progress
+// callback (which is a C function pointer with a void* userdata).
+// ---------------------------------------------------------------------------
+namespace {
+
+struct ProgressCtx {
+  const SdModel::GenerationJob* job = nullptr;
+  std::chrono::steady_clock::time_point startTime;
+};
+
+thread_local ProgressCtx tl_progressCtx;
+
+void sdProgressCallback(int step, int steps, float /*time*/, void* /*data*/) {
+  if (!tl_progressCtx.job || !tl_progressCtx.job->progressCallback) return;
+
+  auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     std::chrono::steady_clock::now() - tl_progressCtx.startTime)
+                     .count();
+
+  std::ostringstream oss;
+  oss << R"({"step":)" << step
+      << R"(,"total":)" << steps
+      << R"(,"elapsed_ms":)" << elapsed << "}";
+
+  tl_progressCtx.job->progressCallback(oss.str());
+}
+
+} // namespace
+
+// ---------------------------------------------------------------------------
+// Construction / destruction
+// ---------------------------------------------------------------------------
+
+SdModel::SdModel(
+    std::string modelPath,
+    std::string clipLPath,
+    std::string clipGPath,
+    std::string t5XxlPath,
+    std::string vaePath,
+    std::unordered_map<std::string, std::string> configMap)
+    : modelPath_(std::move(modelPath)),
+      clipLPath_(std::move(clipLPath)),
+      clipGPath_(std::move(clipGPath)),
+      t5XxlPath_(std::move(t5XxlPath)),
+      vaePath_(std::move(vaePath)),
+      sdCtx_(nullptr, &free_sd_ctx) {
+
+  // --- Parse configMap ---
+  if (auto it = configMap.find("threads"); it != configMap.end()) {
+    nThreads_ = std::stoi(it->second);
+  }
+  if (auto it = configMap.find("clip_on_cpu"); it != configMap.end()) {
+    clipOnCpu_ = (it->second == "1" || it->second == "true");
+  }
+  if (auto it = configMap.find("vae_on_cpu"); it != configMap.end()) {
+    vaeOnCpu_ = (it->second == "1" || it->second == "true");
+  }
+  if (auto it = configMap.find("vae_tiling"); it != configMap.end()) {
+    vaeTiling_ = (it->second == "1" || it->second == "true");
+  }
+  if (auto it = configMap.find("flash_attn"); it != configMap.end()) {
+    flashAttn_ = (it->second == "1" || it->second == "true");
+  }
+  if (auto it = configMap.find("wtype"); it != configMap.end()) {
+    wtype_ = parseWeightType(it->second);
+  }
+  if (auto it = configMap.find("rng"); it != configMap.end()) {
+    rngType_ = (it->second == "cpu") ? STD_DEFAULT_RNG : CUDA_RNG;
+  }
+  if (auto it = configMap.find("schedule"); it != configMap.end()) {
+    schedule_ = parseSchedule(it->second);
+  }
+
+  // Set log callback before creating the context
+  sd_set_log_callback(SdModel::sdLogCallback, nullptr);
+
+  sd_ctx_t* raw = new_sd_ctx(
+      modelPath_.c_str(),
+      clipLPath_.empty()  ? nullptr : clipLPath_.c_str(),
+      clipGPath_.empty()  ? nullptr : clipGPath_.c_str(),
+      t5XxlPath_.empty()  ? nullptr : t5XxlPath_.c_str(),
+      nullptr,            // diffusion_model_path (split models – not yet supported)
+      vaePath_.empty()    ? nullptr : vaePath_.c_str(),
+      nullptr,            // taesd_path
+      nullptr,            // controlnet_path
+      nullptr,            // lora_model_dir
+      nullptr,            // embed_dir
+      nullptr,            // stacked_id_embed_dir
+      /*vae_decode_only=*/false,
+      vaeTiling_,
+      /*free_params_immediately=*/true,
+      nThreads_,
+      wtype_,
+      rngType_,
+      schedule_,
+      clipOnCpu_,
+      /*control_net_cpu=*/false,
+      vaeOnCpu_,
+      flashAttn_);
+
+  if (!raw) {
+    throw StatusError(
+        general_error::InternalError,
+        "Failed to create stable-diffusion context. Check model path and format.");
+  }
+
+  sdCtx_.reset(raw);
+}
+
+SdModel::~SdModel() = default;
+
+// ---------------------------------------------------------------------------
+// IModel::process
+// ---------------------------------------------------------------------------
+
+std::any SdModel::process(const std::any& input) {
+  const auto& job = std::any_cast<const GenerationJob&>(input);
+
+  cancelRequested_.store(false);
+
+  // Install the progress callback for this generation
+  tl_progressCtx.job       = &job;
+  tl_progressCtx.startTime = std::chrono::steady_clock::now();
+  sd_set_progress_callback(sdProgressCallback, nullptr);
+
+  // --- Parse JSON params ---
+  picojson::value v;
+  const std::string parseErr = picojson::parse(v, job.paramsJson);
+  if (!parseErr.empty()) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "Failed to parse generation params JSON: " + parseErr);
+  }
+
+  if (!v.is<picojson::object>()) {
+    throw StatusError(general_error::InvalidArgument, "Generation params must be a JSON object");
+  }
+
+  const auto& obj = v.get<picojson::object>();
+
+  auto getStr = [&](const std::string& key, const std::string& def = "") -> std::string {
+    auto it = obj.find(key);
+    if (it == obj.end() || !it->second.is<std::string>()) return def;
+    return it->second.get<std::string>();
+  };
+
+  auto getInt = [&](const std::string& key, int def) -> int {
+    auto it = obj.find(key);
+    if (it == obj.end() || !it->second.is<double>()) return def;
+    return static_cast<int>(it->second.get<double>());
+  };
+
+  auto getFloat = [&](const std::string& key, float def) -> float {
+    auto it = obj.find(key);
+    if (it == obj.end() || !it->second.is<double>()) return def;
+    return static_cast<float>(it->second.get<double>());
+  };
+
+  const std::string mode           = getStr("mode", "txt2img");
+  const std::string prompt         = getStr("prompt");
+  const std::string negativePrompt = getStr("negative_prompt");
+  const int width                  = getInt("width", 512);
+  const int height                 = getInt("height", 512);
+  const int steps                  = getInt("steps", 20);
+  const float cfgScale             = getFloat("cfg_scale", 7.0f);
+  const int64_t seed               = static_cast<int64_t>(getInt("seed", -1));
+  const int batchCount             = getInt("batch_count", 1);
+  const sample_method_t sampler    = parseSampler(getStr("sampler", "euler_a"));
+
+  auto t0 = std::chrono::steady_clock::now();
+  bool success = false;
+
+  if (mode == "txt2img") {
+    success = runTxt2Img(
+        prompt, negativePrompt, width, height, steps,
+        cfgScale, sampler, seed, batchCount, job);
+  } else if (mode == "img2img") {
+    const float strength = getFloat("strength", 0.75f);
+
+    // Decode base64-encoded init image or use raw bytes passed via separate field
+    // For now, expect init_image_bytes as a JSON array of numbers (uint8)
+    std::vector<uint8_t> initPng;
+    if (auto it = obj.find("init_image_bytes"); it != obj.end() && it->second.is<picojson::array>()) {
+      const auto& arr = it->second.get<picojson::array>();
+      initPng.reserve(arr.size());
+      for (const auto& el : arr) {
+        initPng.push_back(static_cast<uint8_t>(el.get<double>()));
+      }
+    }
+
+    success = runImg2Img(
+        prompt, negativePrompt, initPng, width, height, steps,
+        cfgScale, strength, sampler, seed, batchCount, job);
+  } else {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "Unknown generation mode: " + mode + ". Supported: txt2img, img2img");
+  }
+
+  auto t1 = std::chrono::steady_clock::now();
+  const double generationTimeMs =
+      std::chrono::duration<double, std::milli>(t1 - t0).count();
+
+  lastStats_["generation_time"] = generationTimeMs;
+  lastStats_["steps"]           = static_cast<double>(steps);
+  lastStats_["width"]           = static_cast<double>(width);
+  lastStats_["height"]          = static_cast<double>(height);
+  lastStats_["success"]         = success ? 1.0 : 0.0;
+
+  tl_progressCtx.job = nullptr;
+
+  return lastStats_;
+}
+
+// ---------------------------------------------------------------------------
+// txt2img / img2img
+// ---------------------------------------------------------------------------
+
+bool SdModel::runTxt2Img(
+    const std::string& prompt,
+    const std::string& negativePrompt,
+    int width, int height,
+    int steps, float cfgScale,
+    sample_method_t sampler,
+    int64_t seed, int batchCount,
+    const GenerationJob& job) {
+
+  sd_image_t* results = txt2img(
+      sdCtx_.get(),
+      prompt.c_str(),
+      negativePrompt.c_str(),
+      /*clip_skip=*/-1,
+      cfgScale,
+      /*guidance=*/3.5f,
+      /*eta=*/0.0f,
+      width, height,
+      sampler,
+      steps,
+      seed,
+      batchCount,
+      /*control_cond=*/nullptr,
+      /*control_strength=*/0.9f,
+      /*style_strength=*/0.2f,
+      /*normalize_input=*/false,
+      /*input_id_images_path=*/"",
+      /*skip_layers=*/nullptr,
+      /*skip_layers_count=*/0,
+      /*slg_scale=*/0.0f,
+      /*skip_layer_start=*/0.01f,
+      /*skip_layer_end=*/0.2f);
+
+  if (!results) return false;
+
+  for (int i = 0; i < batchCount; ++i) {
+    if (results[i].data && !cancelRequested_.load()) {
+      auto png = encodeToPng(results[i]);
+      if (!png.empty() && job.outputCallback) {
+        job.outputCallback(png);
+      }
+      free(results[i].data);
+    }
+  }
+  free(results);
+  return true;
+}
+
+bool SdModel::runImg2Img(
+    const std::string& prompt,
+    const std::string& negativePrompt,
+    const std::vector<uint8_t>& initImagePng,
+    int width, int height,
+    int steps, float cfgScale, float strength,
+    sample_method_t sampler,
+    int64_t seed, int batchCount,
+    const GenerationJob& job) {
+
+  sd_image_t initImg = decodePng(initImagePng, width, height);
+  if (!initImg.data && !initImagePng.empty()) {
+    throw StatusError(general_error::InvalidArgument, "Failed to decode init_image PNG");
+  }
+
+  sd_image_t maskImg{};
+
+  sd_image_t* results = img2img(
+      sdCtx_.get(),
+      initImg,
+      maskImg,
+      prompt.c_str(),
+      negativePrompt.c_str(),
+      /*clip_skip=*/-1,
+      cfgScale,
+      /*guidance=*/3.5f,
+      width, height,
+      sampler,
+      steps,
+      strength,
+      seed,
+      batchCount,
+      /*control_cond=*/nullptr,
+      /*control_strength=*/0.9f,
+      /*style_strength=*/0.2f,
+      /*normalize_input=*/false,
+      /*input_id_images_path=*/"");
+
+  free(initImg.data);
+
+  if (!results) return false;
+
+  for (int i = 0; i < batchCount; ++i) {
+    if (results[i].data && !cancelRequested_.load()) {
+      auto png = encodeToPng(results[i]);
+      if (!png.empty() && job.outputCallback) {
+        job.outputCallback(png);
+      }
+      free(results[i].data);
+    }
+  }
+  free(results);
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// cancel
+// ---------------------------------------------------------------------------
+
+void SdModel::cancel() const {
+  cancelRequested_.store(true);
+}
+
+// ---------------------------------------------------------------------------
+// runtimeStats
+// ---------------------------------------------------------------------------
+
+qvac_lib_inference_addon_cpp::RuntimeStats SdModel::runtimeStats() const {
+  return lastStats_;
+}
+
+// ---------------------------------------------------------------------------
+// PNG encode / decode
+// ---------------------------------------------------------------------------
+
+std::vector<uint8_t> SdModel::encodeToPng(const sd_image_t& img) {
+  std::vector<uint8_t> out;
+
+  auto writeCallback = [](void* ctx, void* data, int size) {
+    auto* vec = static_cast<std::vector<uint8_t>*>(ctx);
+    const auto* bytes = static_cast<const uint8_t*>(data);
+    vec->insert(vec->end(), bytes, bytes + size);
+  };
+
+  const int stride = static_cast<int>(img.width * img.channel);
+  stbi_write_png_to_func(
+      writeCallback, &out,
+      static_cast<int>(img.width),
+      static_cast<int>(img.height),
+      static_cast<int>(img.channel),
+      img.data,
+      stride);
+
+  return out;
+}
+
+sd_image_t SdModel::decodePng(
+    const std::vector<uint8_t>& pngBytes, int targetWidth, int targetHeight) {
+  if (pngBytes.empty()) return sd_image_t{};
+
+  int w = 0, h = 0, c = 0;
+  uint8_t* data = stbi_load_from_memory(
+      pngBytes.data(),
+      static_cast<int>(pngBytes.size()),
+      &w, &h, &c, 3);
+
+  if (!data) return sd_image_t{};
+
+  sd_image_t img{};
+  img.width   = static_cast<uint32_t>(w);
+  img.height  = static_cast<uint32_t>(h);
+  img.channel = 3;
+  img.data    = data;
+
+  (void)targetWidth;
+  (void)targetHeight;
+  return img;
+}
+
+// ---------------------------------------------------------------------------
+// Enum parsers
+// ---------------------------------------------------------------------------
+
+sample_method_t SdModel::parseSampler(const std::string& name) {
+  if (name == "euler_a")    return EULER_A;
+  if (name == "euler")      return EULER;
+  if (name == "heun")       return HEUN;
+  if (name == "dpm2")       return DPM2;
+  if (name == "dpm++_2m")   return DPMPP2M;
+  if (name == "dpm++_2m_v2") return DPMPP2Mv2;
+  if (name == "dpm++_2s_a") return DPMPP2SA;
+  if (name == "lcm")        return LCM;
+  return EULER_A; // safe default
+}
+
+sd_type_t SdModel::parseWeightType(const std::string& name) {
+  if (name == "f32")  return SD_TYPE_F32;
+  if (name == "f16")  return SD_TYPE_F16;
+  if (name == "q4_0") return SD_TYPE_Q4_0;
+  if (name == "q4_1") return SD_TYPE_Q4_1;
+  if (name == "q5_0") return SD_TYPE_Q5_0;
+  if (name == "q5_1") return SD_TYPE_Q5_1;
+  if (name == "q8_0") return SD_TYPE_Q8_0;
+  return SD_TYPE_COUNT; // auto
+}
+
+schedule_t SdModel::parseSchedule(const std::string& name) {
+  if (name == "discrete")    return DISCRETE;
+  if (name == "karras")      return KARRAS;
+  if (name == "exponential") return EXPONENTIAL;
+  if (name == "ays")         return AYS;
+  if (name == "gits")        return GITS;
+  return DEFAULT;
+}
+
+// ---------------------------------------------------------------------------
+// Log callback
+// ---------------------------------------------------------------------------
+
+void SdModel::sdLogCallback(
+    sd_log_level_t level, const char* text, void* /*userData*/) {
+  namespace logging = qvac_lib_inference_addon_cpp::logger;
+
+  logging::Priority priority;
+  switch (level) {
+  case SD_LOG_DEBUG: priority = logging::Priority::DEBUG;   break;
+  case SD_LOG_INFO:  priority = logging::Priority::INFO;    break;
+  case SD_LOG_WARN:  priority = logging::Priority::WARNING; break;
+  case SD_LOG_ERROR: priority = logging::Priority::ERROR;   break;
+  default:           priority = logging::Priority::ERROR;   break;
+  }
+
+  QLOG_IF(priority, std::string(text));
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.hpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.hpp
new file mode 100644
index 0000000000..565638937d
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/model-interface/SdModel.hpp
@@ -0,0 +1,142 @@
+#pragma once
+
+#include <any>
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <stable-diffusion.h>
+
+#include <qvac-lib-inference-addon-cpp/ModelInterfaces.hpp>
+#include <qvac-lib-inference-addon-cpp/RuntimeStats.hpp>
+
+using namespace qvac_lib_inference_addon_cpp::model;
+
+/**
+ * Core stable-diffusion.cpp model wrapper.
+ *
+ * Manages the sd_ctx lifetime and exposes IModel/IModelCancel interfaces
+ * expected by the qvac-lib-inference-addon-cpp framework.
+ */
+class SdModel : public IModel, public IModelCancel {
+public:
+  SdModel(const SdModel&)            = delete;
+  SdModel& operator=(const SdModel&) = delete;
+  SdModel(SdModel&&)                 = delete;
+  SdModel& operator=(SdModel&&)      = delete;
+
+  /**
+   * @param modelPath   Path to the main model weights file (.gguf, .safetensors, .ckpt)
+   * @param clipLPath   Optional path to a separate CLIP-L text encoder
+   * @param clipGPath   Optional path to a separate CLIP-G text encoder
+   * @param t5XxlPath   Optional path to a separate T5-XXL text encoder (FLUX/SD3)
+   * @param vaePath     Optional path to a separate VAE
+   * @param configMap   Configuration key/value pairs (threads, device, wtype, etc.)
+   */
+  SdModel(
+      std::string modelPath,
+      std::string clipLPath,
+      std::string clipGPath,
+      std::string t5XxlPath,
+      std::string vaePath,
+      std::unordered_map<std::string, std::string> configMap);
+
+  ~SdModel() override;
+
+  std::string getName() const final { return "SdModel"; }
+
+  /**
+   * Input structure for a single generation job.
+   * Passed as std::any through the addon-cpp framework.
+   */
+  struct GenerationJob {
+    std::string paramsJson;
+
+    /** Called each diffusion step with a JSON string: {"step":N,"total":M,"elapsed_ms":T} */
+    std::function<void(const std::string&)> progressCallback;
+
+    /** Called once per image/frame with the PNG-encoded bytes */
+    std::function<void(const std::vector<uint8_t>&)> outputCallback;
+  };
+
+  /** Implements IModel::process() – runs the generation job synchronously on the worker thread. */
+  std::any process(const std::any& input) final;
+
+  /** Implements IModelCancel::cancel() – signals the running generation to stop. */
+  void cancel() const final;
+
+  qvac_lib_inference_addon_cpp::RuntimeStats runtimeStats() const final;
+
+  /** Static log callback forwarded to the qvac logger. */
+  static void sdLogCallback(
+      sd_log_level_t level, const char* text, void* userData);
+
+private:
+  /** Parse JSON params and run txt2img. Returns true on success. */
+  bool runTxt2Img(
+      const std::string& prompt,
+      const std::string& negativePrompt,
+      int width, int height,
+      int steps,
+      float cfgScale,
+      sample_method_t sampler,
+      int64_t seed,
+      int batchCount,
+      const GenerationJob& job);
+
+  /** Parse JSON params and run img2img. Returns true on success. */
+  bool runImg2Img(
+      const std::string& prompt,
+      const std::string& negativePrompt,
+      const std::vector<uint8_t>& initImagePng,
+      int width, int height,
+      int steps,
+      float cfgScale,
+      float strength,
+      sample_method_t sampler,
+      int64_t seed,
+      int batchCount,
+      const GenerationJob& job);
+
+  /** Encode an sd_image_t as PNG bytes using stb_image_write. */
+  static std::vector<uint8_t> encodeToPng(const sd_image_t& img);
+
+  /** Decode PNG bytes into an sd_image_t (caller owns .data). */
+  static sd_image_t decodePng(
+      const std::vector<uint8_t>& pngBytes, int targetWidth, int targetHeight);
+
+  /** Parse a sampler name string into the stable-diffusion.cpp enum. */
+  static sample_method_t parseSampler(const std::string& name);
+
+  /** Parse a weight-type string into the sd_type_t enum. */
+  static sd_type_t parseWeightType(const std::string& name);
+
+  /** Parse a schedule string into the schedule_t enum. */
+  static schedule_t parseSchedule(const std::string& name);
+
+  const std::string modelPath_;
+  const std::string clipLPath_;
+  const std::string clipGPath_;
+  const std::string t5XxlPath_;
+  const std::string vaePath_;
+
+  // Configuration parsed from configMap
+  int nThreads_       = -1;
+  bool clipOnCpu_     = false;
+  bool vaeOnCpu_      = false;
+  bool vaeTiling_     = false;
+  bool flashAttn_     = false;
+  sd_type_t wtype_    = SD_TYPE_COUNT; // SD_TYPE_COUNT = auto/default
+  rng_type_t rngType_ = CUDA_RNG;
+  schedule_t schedule_ = DEFAULT;
+
+  std::unique_ptr<sd_ctx_t, decltype(&free_sd_ctx)> sdCtx_;
+  mutable std::atomic<bool> cancelRequested_{ false };
+
+  // Runtime stats updated after each job
+  mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_;
+};
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.cpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.cpp
new file mode 100644
index 0000000000..2f4987c3c1
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.cpp
@@ -0,0 +1,37 @@
+#include "BackendSelection.hpp"
+
+#include <qvac-lib-inference-addon-cpp/Errors.hpp>
+
+using qvac_errors::general_error;
+using qvac_errors::StatusError;
+
+namespace sd_backend_selection {
+
+BackendDevice preferredDeviceFromMap(
+    const std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("device");
+  if (it == configMap.end()) {
+    return BackendDevice::GPU; // default: prefer GPU
+  }
+
+  const std::string& device = it->second;
+  if (device == "gpu") return BackendDevice::GPU;
+  if (device == "cpu") return BackendDevice::CPU;
+
+  throw StatusError(
+      general_error::InvalidArgument,
+      "Invalid device value '" + device + "'. Must be 'gpu' or 'cpu'.");
+}
+
+int threadsFromMap(
+    const std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("threads");
+  if (it == configMap.end()) return -1; // auto
+  try {
+    return std::stoi(it->second);
+  } catch (...) {
+    return -1;
+  }
+}
+
+} // namespace sd_backend_selection
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.hpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.hpp
new file mode 100644
index 0000000000..8f1edb42c8
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/BackendSelection.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+namespace sd_backend_selection {
+
+enum class BackendDevice : uint8_t { CPU, GPU };
+
+/**
+ * Parse the "device" key from a config map.
+ * Returns CPU or GPU. Throws StatusError on unknown value.
+ */
+BackendDevice preferredDeviceFromMap(
+    const std::unordered_map<std::string, std::string>& configMap);
+
+/**
+ * Determine the number of CPU threads from a config map.
+ * Returns -1 (auto) if not specified.
+ */
+int threadsFromMap(
+    const std::unordered_map<std::string, std::string>& configMap);
+
+} // namespace sd_backend_selection
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.cpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.cpp
new file mode 100644
index 0000000000..c976a97e04
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.cpp
@@ -0,0 +1,33 @@
+#include "LoggingMacros.hpp"
+
+using namespace qvac_lib_inference_addon_cpp::logger;
+
+namespace qvac_lib_inference_addon_sd {
+namespace logging {
+
+// Default to ERROR to prevent log spam before verbosity is configured
+Priority g_verbosityLevel = Priority::ERROR;
+
+void setVerbosityLevel(
+    std::unordered_map<std::string, std::string>& configMap) {
+  auto it = configMap.find("verbosity");
+  if (it == configMap.end()) return;
+
+  try {
+    const int v = std::stoi(it->second);
+    switch (v) {
+    case 0: g_verbosityLevel = Priority::ERROR;   break;
+    case 1: g_verbosityLevel = Priority::WARNING; break;
+    case 2: g_verbosityLevel = Priority::INFO;    break;
+    case 3:
+    default: g_verbosityLevel = Priority::DEBUG;  break;
+    }
+  } catch (...) {
+    g_verbosityLevel = Priority::ERROR;
+  }
+
+  configMap.erase(it);
+}
+
+} // namespace logging
+} // namespace qvac_lib_inference_addon_sd
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.hpp b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.hpp
new file mode 100644
index 0000000000..98a4e7a39d
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addon/src/utils/LoggingMacros.hpp
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+#include "qvac-lib-inference-addon-cpp/Logger.hpp"
+
+namespace qvac_lib_inference_addon_sd {
+namespace logging {
+
+// Global verbosity level shared across all SD model instances
+extern qvac_lib_inference_addon_cpp::logger::Priority g_verbosityLevel;
+
+/**
+ * Parse the "verbosity" key from a config map and set the global log level.
+ * 0=error, 1=warn, 2=info, 3=debug. Defaults to ERROR if not present.
+ */
+void setVerbosityLevel(
+    std::unordered_map<std::string, std::string>& configMap);
+
+} // namespace logging
+} // namespace qvac_lib_inference_addon_sd
+
+// Conditional log macro – only emits if priority <= current global level
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#define QLOG_IF(priority, message)                                              \
+  do {                                                                          \
+    if (static_cast<int>(priority) <=                                           \
+        static_cast<int>(                                                       \
+            qvac_lib_inference_addon_sd::logging::g_verbosityLevel)) {         \
+      QLOG(priority, message);                                                  \
+    }                                                                           \
+  } while (0)
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.d.ts b/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.d.ts
new file mode 100644
index 0000000000..bd687d60bc
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.d.ts
@@ -0,0 +1,7 @@
+export interface AddonLogging {
+  setLogger(callback: (priority: number, message: string) => void): void
+  releaseLogger(): void
+}
+
+declare const addonLogging: AddonLogging
+export default addonLogging
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.js b/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.js
new file mode 100644
index 0000000000..479ecdf3da
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/addonLogging.js
@@ -0,0 +1,6 @@
+const binding = require('./binding')
+
+module.exports = {
+  setLogger: binding.setLogger,
+  releaseLogger: binding.releaseLogger
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/binding.js b/packages/qvac-lib-infer-stable-diffusion-cpp/binding.js
new file mode 100644
index 0000000000..cea46308c0
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/binding.js
@@ -0,0 +1 @@
+module.exports = require.addon()
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/docs/architecture.md b/packages/qvac-lib-infer-stable-diffusion-cpp/docs/architecture.md
new file mode 100644
index 0000000000..156e1421ee
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/docs/architecture.md
@@ -0,0 +1,831 @@
+# Architecture Documentation
+
+**Package:** `@qvac/img-stable-diffusion-cpp` v0.1.0  
+**Stack:** JavaScript, C++20, stable-diffusion.cpp, Bare Runtime, CMake, vcpkg  
+**License:** Apache-2.0
+
+---
+
+## Table of Contents
+
+### Overview
+- [Purpose](#purpose)
+- [Key Features](#key-features)
+- [Target Platforms](#target-platforms)
+
+### Core Architecture
+- [Package Context](#package-context)
+- [Public API](#public-api)
+- [Internal Architecture](#internal-architecture)
+- [Core Components](#core-components)
+- [Bare Runtime Integration](#bare-runtime-integration)
+
+### Architecture Decisions
+- [Decision 1: stable-diffusion.cpp as Inference Backend](#decision-1-stable-diffusioncpp-as-inference-backend)
+- [Decision 2: Bare Runtime over Node.js](#decision-2-bare-runtime-over-nodejs)
+- [Decision 3: Pluggable Data Loader Architecture](#decision-3-pluggable-data-loader-architecture)
+- [Decision 4: Incremental Buffer-Based Weight Loading](#decision-4-incremental-buffer-based-weight-loading)
+- [Decision 5: Generation Parameters Format](#decision-5-generation-parameters-format-json-serialization)
+- [Decision 6: Exclusive Run Queue](#decision-6-exclusive-run-queue-indexjs)
+- [Decision 7: TypeScript Definitions](#decision-7-typescript-definitions)
+
+### Technical Debt
+- [Limited Error Context](#1-limited-error-context)
+
+---
+
+# Overview
+
+## Purpose
+
+`@qvac/img-stable-diffusion-cpp` is a cross-platform npm package providing diffusion model inference for Bare runtime applications. It wraps stable-diffusion.cpp in a JavaScript-friendly API, enabling local image and video generation on desktop and mobile with CPU/GPU acceleration.
+
+**Core value:**
+- High-level JavaScript API for diffusion model inference
+- Peer-to-peer model distribution via Hyperdrive
+- Progress callback during generation steps
+- Text-to-image, image-to-image, and video generation
+- Pluggable model weight loaders
+
+## Key Features
+
+- **Cross-platform**: macOS, Linux, Windows, iOS, Android
+- **Multiple loaders**: Hyperdrive (P2P), filesystem, custom
+- **Progress tracking**: Step-by-step generation progress callbacks
+- **GPU acceleration**: Metal, Vulkan, CUDA, OpenCL
+- **Quantized models**: GGUF, safetensors, checkpoint formats
+- **Diffusion models**: SD1.x, SD2.x, SDXL, SD3, FLUX, Wan (video), Qwen Image, Z-Image
+- **Advanced features**: LoRA, ControlNet, ESRGAN upscaling, TAESD decoding
+- **Generation modes**: txt2img, img2img, inpainting, video generation
+
+## Target Platforms
+
+| Platform | Architecture | Min Version | Status | GPU Support |
+|----------|-------------|-------------|--------|-------------|
+| macOS | arm64, x64 | 14.0+ | ✅ Tier 1 | Metal |
+| iOS | arm64 | 17.0+ | ✅ Tier 1 | Metal |
+| Linux | arm64, x64 | Ubuntu-22+ | ✅ Tier 1 | Vulkan, CUDA |
+| Android | arm64 | 12+ | ✅ Tier 1 | Vulkan, OpenCL |
+| Windows | x64 | 10+ | ✅ Tier 1 | Vulkan, CUDA |
+
+**Dependencies:**
+- qvac-lib-inference-addon-cpp (≥1.1.2): C++ addon framework (single-job runner, runJob/activate/loadWeights/cancel/destroyInstance)
+- stable-diffusion.cpp: Diffusion inference engine
+- Bare Runtime (≥1.24.0): JavaScript runtime
+- Ubuntu-22 requires g++-13 installed
+
+---
+
+# Core Architecture
+
+## Package Context
+
+### Ecosystem Position
+
+```mermaid
+graph TB
+    subgraph "Application Layer"
+        APP[QVAC Applications]
+    end
+    
+    subgraph "Inference Addons"
+        IMG[img-stable-diffusion-cpp<br/>Image/Video Gen]
+        LLM[llm-llamacpp<br/>LLMs]
+        EMBED[embed-llamacpp<br/>Embeddings]
+        WHISPER[whispercpp<br/>STT]
+    end
+    
+    subgraph "core libs"
+        BASE["@qvac/infer-base"]
+        DL["@qvac/dl-hyperdrive"]
+    end
+    
+    subgraph "Native Framework"
+        ADDON[addon-cpp]
+    end
+    
+    subgraph "Backend"
+        BARE[Bare Runtime]
+        SDCPP[stable-diffusion.cpp]
+    end
+    
+    APP --> IMG
+    IMG --> BASE
+    IMG --> DL
+    IMG --> ADDON
+    ADDON --> BARE
+    ADDON --> SDCPP
+    
+    style IMG fill:#e1f5ff,stroke:#0066cc,stroke-width:3px
+```
+
+<details>
+<summary>📊 LLM-Friendly: Package Relationships</summary>
+
+**Dependency Table:**
+
+| Package | Type | Version | Purpose |
+|---------|------|---------|---------|
+| @qvac/infer-base | Framework | ^0.2.0 | Base classes, WeightsProvider, QvacResponse |
+| @qvac/dl-hyperdrive | Peer | ^0.1.1 | P2P model loading |
+| qvac-lib-inference-addon-cpp | Native | ≥1.1.1 | C++ addon framework (single-job runner) |
+| stable-diffusion.cpp | Native | latest | Diffusion inference engine |
+| Bare Runtime | Runtime | ≥1.24.0 | JavaScript execution |
+
+**Integration Points:**
+
+| From | To | Mechanism | Data Format |
+|------|-----|-----------|-------------|
+| JavaScript | ImgStableDiffusion | Constructor | args, config objects |
+| ImgStableDiffusion | BaseInference | Inheritance | Template method pattern |
+| ImgStableDiffusion | SdInterface | Composition | Method calls |
+| SdInterface | C++ Addon | require.addon() | Native binding |
+| WeightsProvider | Data Loader | Interface | Stream protocol |
+
+</details>
+
+---
+
+## Public API
+
+### Main Class: ImgStableDiffusion
+
+```mermaid
+classDiagram
+    class ImgStableDiffusion {
+        +constructor(args, config)
+        +load(closeLoader, onProgress) Promise~void~
+        +txt2img(params) Promise~QvacImageResponse~
+        +img2img(params) Promise~QvacImageResponse~
+        +txt2vid(params) Promise~QvacVideoResponse~
+        +unload() Promise~void~
+        +downloadWeights(onProgress, opts) Promise~string~
+    }
+    
+    class BaseInference {
+        <<abstract>>
+        +load() Promise~void~
+        +run() Promise~QvacResponse~
+        +unload() Promise~void~
+    }
+    
+    class QvacImageResponse {
+        +onStep(callback) QvacImageResponse
+        +await() Promise~ImageResult~
+        +cancel() Promise~void~
+        +stats object
+    }
+    
+    class QvacVideoResponse {
+        +onFrame(callback) QvacVideoResponse
+        +await() Promise~VideoResult~
+        +cancel() Promise~void~
+        +stats object
+    }
+    
+    class WeightsProvider {
+        +downloadFiles(files, path, opts) Promise~void~
+        +streamFiles(shards, onChunk, onProgress) Promise~void~
+    }
+    
+    ImgStableDiffusion --|> BaseInference
+    ImgStableDiffusion *-- WeightsProvider
+    ImgStableDiffusion ..> QvacImageResponse : creates
+    ImgStableDiffusion ..> QvacVideoResponse : creates
+```
+
+<details>
+<summary>📊 LLM-Friendly: Class Responsibilities</summary>
+
+**Component Roles:**
+
+| Class | Responsibility | Lifecycle | Dependencies |
+|-------|----------------|-----------|--------------|
+| ImgStableDiffusion | Orchestrate model lifecycle, manage loading/inference | Created by user, persistent | WeightsProvider, SdInterface |
+| BaseInference | Define standard inference API | Abstract base class | None |
+| QvacImageResponse | Handle image generation progress and result | Created per txt2img/img2img call | None |
+| QvacVideoResponse | Handle video generation progress and result | Created per txt2vid call | None |
+| WeightsProvider | Abstract model weight loading | Created by ImgStableDiffusion | DataLoader |
+
+**Key Relationships:**
+
+| From | To | Type | Purpose |
+|------|-----|------|---------|
+| ImgStableDiffusion | BaseInference | Inheritance | Standard QVAC inference API |
+| ImgStableDiffusion | WeightsProvider | Composition | Model weight acquisition |
+| ImgStableDiffusion | QvacImageResponse | Creates | Progress/result per image generation |
+| ImgStableDiffusion | QvacVideoResponse | Creates | Progress/result per video generation |
+
+</details>
+
+---
+
+## Internal Architecture
+
+### Architectural Pattern
+
+The package follows a **layered architecture** with clear separation of concerns:
+
+```mermaid
+graph TB
+    subgraph "Layer 1: JavaScript API"
+        APP["Application Code"]
+        IMGCLASS["ImgStableDiffusion<br/>(index.js)"]
+        BASEINF["BaseInference<br/>(@qvac/infer-base)"]
+        WEIGHTSPR["WeightsProvider<br/>(@qvac/infer-base)"]
+        RESPONSE["QvacImageResponse<br/>QvacVideoResponse"]
+    end
+    
+    subgraph "Layer 2: Bridge"
+        SDIF["SdInterface<br/>(addon.js)"]
+        BINDING["require.addon<br/>(binding.js)"]
+    end
+    
+    subgraph "Layer 3: C++ Addon"
+        JSINTERFACE["JsInterface<br/>(addon-cpp JsInterface)"]
+        ADDONCPP["AddonCpp / AddonJs<br/>(addon-cpp + addon/AddonJs.hpp)"]
+        WEIGHTSLOAD["WeightsLoader<br/>(addon-cpp)"]
+    end
+    
+    subgraph "Layer 4: Model"
+        SDMODEL["SdModel<br/>(model-interface/SdModel.cpp)"]
+        TXT2IMG["Txt2ImgContext<br/>(model-interface/Txt2ImgContext.cpp)"]
+        IMG2IMG["Img2ImgContext<br/>(model-interface/Img2ImgContext.cpp)"]
+        VIDGEN["VideoGenContext<br/>(model-interface/VideoGenContext.cpp)"]
+    end
+    
+    subgraph "Layer 5: Backend"
+        SDCPP["stable-diffusion.cpp"]
+        GGML["GGML"]
+        GPU["GPU Backends<br/>(Metal/Vulkan/CUDA/OpenCL)"]
+    end
+    
+    APP --> IMGCLASS
+    IMGCLASS --> BASEINF
+    IMGCLASS --> WEIGHTSPR
+    IMGCLASS --> SDIF
+    IMGCLASS -.-> RESPONSE
+    
+    SDIF --> BINDING
+    BINDING --> JSINTERFACE
+    WEIGHTSPR --> WEIGHTSLOAD
+    
+    JSINTERFACE --> ADDONCPP
+    ADDONCPP --> WEIGHTSLOAD
+    ADDONCPP --> SDMODEL
+    
+    SDMODEL --> TXT2IMG
+    SDMODEL --> IMG2IMG
+    SDMODEL --> VIDGEN
+    TXT2IMG --> SDCPP
+    IMG2IMG --> SDCPP
+    VIDGEN --> SDCPP
+    
+    SDCPP --> GGML
+    GGML --> GPU
+    
+    style IMGCLASS fill:#e1f5ff
+    style ADDONCPP fill:#ffe1e1
+    style SDMODEL fill:#ffe1e1
+    style SDCPP fill:#e1ffe1
+```
+
+<details>
+<summary>📊 LLM-Friendly: Layer Responsibilities</summary>
+
+**Layer Breakdown:**
+
+| Layer | Components | Responsibility | Language | Why This Layer |
+|-------|------------|----------------|----------|----------------|
+| 1. JavaScript API | ImgStableDiffusion, BaseInference | High-level API, error handling | JS | Ergonomic API for npm consumers |
+| 2. Bridge | SdInterface, binding.js | JS↔C++ communication | JS wrapper | Lifecycle management, handle safety |
+| 3. C++ Addon | JsInterface, AddonCpp/AddonJs | Single-job runner, threading, callbacks | C++ | Performance, native integration |
+| 4. Model | SdModel, Contexts | Diffusion logic, sampling | C++ | Direct stable-diffusion.cpp integration |
+| 5. Backend | stable-diffusion.cpp, GGML | Tensor ops, GPU kernels | C++ | Optimized inference |
+
+**Data Flow Through Layers:**
+
+| Direction | Path | Data Format | Transform |
+|-----------|------|-------------|-----------|
+| Input → | JS → Bridge → Addon | JSON params | Serialize generation params |
+| Input → | Addon → Model | parsed params | Parse JSON, configure sampler |
+| Input → | Model → SD.cpp | latent tensors | Encode prompt, prepare latents |
+| Output ← | SD.cpp → Model | latent tensors | Denoise step |
+| Output ← | Model → Addon | step progress | Report progress |
+| Output ← | Addon → Bridge | progress/image | Queue output |
+| Output ← | Bridge → JS | Uint8Array (PNG) | Emit via callback |
+
+</details>
+
+---
+
+## Core Components
+
+### JavaScript Components
+
+#### **ImgStableDiffusion (index.js)**
+
+**Responsibility:** Main API class, orchestrates model lifecycle, manages data loaders
+
+**Why JavaScript:**
+- High-level API ergonomics for npm consumers
+- Promise/async-await integration
+- Event loop integration for progress callbacks
+- Configuration parsing
+
+#### **SdInterface (addon.js)**
+
+**Responsibility:** JavaScript wrapper around native addon, manages handle lifecycle
+
+**Why JavaScript:**
+- Clean JavaScript API over raw C++ bindings
+- Native handle lifecycle management
+- Type conversion between JS and native
+
+### C++ Components
+
+#### **SdModel (model-interface/SdModel.cpp)**
+
+**Responsibility:** Core diffusion implementation wrapping stable-diffusion.cpp
+
+**Why C++:**
+- Direct integration with stable-diffusion.cpp C API
+- Performance-critical diffusion loop
+- Memory-efficient tensor processing
+- Native GPU backend access
+
+#### **AddonCpp / AddonJs (addon-cpp + addon/AddonJs.hpp)**
+
+**Responsibility:** Addon-cpp framework integration; IMG addon provides createInstance and runJob over JsInterface
+
+**Why C++:**
+- Single-job runner (one job at a time, runJob returns boolean accepted)
+- Dedicated processing thread via addon-cpp JobRunner
+- Thread-safe job submission and cancellation (IModelCancel)
+- Output dispatching via uv_async
+
+**IMG specialization:** createInstance builds SdModel with config; runJob parses generation params (prompt, negative_prompt, cfg_scale, steps, etc.)
+
+#### **WeightsProvider (@qvac/infer-base)**
+
+**Responsibility:** Abstracts model weight acquisition
+
+**Why JavaScript:**
+- Integrates with data loaders (Hyperdrive, filesystem)
+- Progress tracking and reporting
+- Handles multi-file models (UNet, VAE, CLIP, etc.)
+- Streaming chunk delivery
+
+#### **BackendSelection (utils/BackendSelection.cpp)**
+
+**Responsibility:** GPU backend selection at runtime
+
+- Selects between CPU, Metal, Vulkan, CUDA, and OpenCL backends at runtime
+- Metal compiled statically on macOS/iOS
+- CUDA available on Linux/Windows with NVIDIA GPUs
+- Vulkan as cross-platform fallback
+- OpenCL for Adreno GPUs on Android
+
+#### **SamplerManager (model-interface/SamplerManager.cpp)**
+
+**Responsibility:** Manages diffusion sampling methods
+
+- Supports multiple samplers: Euler, Euler A, Heun, DPM2, DPM++ 2M, DPM++ 2S a, LCM
+- Configurable CFG scale, steps, seed
+- Scheduler selection (Karras, linear, etc.)
+
+#### **LoraManager (model-interface/LoraManager.cpp)**
+
+**Responsibility:** LoRA weight loading and application
+
+- Loads LoRA weights from safetensors/GGUF
+- Applies LoRA to UNet and text encoder
+- Supports multiple simultaneous LoRAs with configurable weights
+
+---
+
+## Bare Runtime Integration
+
+### Communication Pattern
+
+```mermaid
+sequenceDiagram
+    participant JS as JavaScript
+    participant IF as SdInterface
+    participant Bind as Native Binding
+    participant Addon as AddonCpp/AddonJs
+    participant Model as SdModel
+    participant SD as stable-diffusion.cpp
+    
+    JS->>IF: txt2img(params)
+    IF->>Bind: runJob(handle, paramsJson)
+    Bind->>Addon: runJob(params) [lock mutex]
+    Addon->>Addon: Set job input
+    Addon->>Addon: cv.notify_one()
+    Bind-->>IF: accepted (boolean)
+    IF-->>JS: QvacImageResponse
+    
+    Note over Addon: Processing Thread
+    Addon->>Addon: Take job
+    Addon->>Addon: uv_async_send (JobStarted)
+    
+    loop For each diffusion step
+        Addon->>Model: process(params)
+        Model->>SD: sd_txt2img_step()
+        SD-->>Model: latents
+        Model->>Addon: progressCallback(step, total)
+        Addon->>Addon: Queue progress [lock]
+        Addon->>Addon: uv_async_send()
+    end
+    
+    Model->>SD: vae_decode()
+    SD-->>Model: pixel_data
+    Model->>Addon: outputCallback(image_data)
+    
+    Note over Addon: UV async callback
+    Addon->>Bind: jsOutputCallback()
+    Bind->>IF: outputCb('Output', jobId, image)
+    IF->>JS: Response emits image
+```
+
+<details>
+<summary>📊 LLM-Friendly: Thread Communication</summary>
+
+**Thread Responsibilities:**
+
+| Thread | Runs | Blocks On | Can Call |
+|--------|------|-----------|----------|
+| JavaScript | App code, callbacks | Nothing (event loop) | All JS, addon methods |
+| Processing | Diffusion steps | model.process() | model.*, uv_async_send() |
+
+**Synchronization Primitives:**
+
+| Primitive | Purpose | Held Duration | Risk |
+|-----------|---------|---------------|------|
+| std::mutex | Protect single job state | <1ms | Low (brief) |
+| std::condition_variable | Wake processing thread | N/A | None |
+| uv_async_t | Wake JS thread | N/A | None |
+
+**Thread Safety Rules:**
+
+1. ✅ Call addon methods from any thread (runJob, cancel, activate, loadWeights, destroyInstance)
+2. ✅ Processing thread calls model methods
+3. ❌ Don't call JS functions from C++ thread (use uv_async_send)
+4. ❌ Don't call model methods from JS thread
+
+</details>
+
+---
+
+# Architecture Decisions
+
+## Decision 1: stable-diffusion.cpp as Inference Backend
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** stable-diffusion.cpp over Python diffusers, ONNX Runtime, and alternatives  
+**Why:** Pure C++ implementation, GGML-based (consistent with llama.cpp), broad model support, mature cross-platform GPU acceleration  
+**Cost:** Large binary size, C++ build complexity, API instability
+
+</details>
+
+### Context
+
+Need high-performance, cross-platform diffusion model inference for resource-constrained environments (laptops, mobile devices) with support for:
+- Various model architectures (SD1.x, SD2.x, SDXL, SD3, FLUX, Wan, etc.)
+- Quantization for reduced memory footprint
+- GPU acceleration on diverse hardware
+- Both image and video generation
+
+### Decision
+
+Use stable-diffusion.cpp as the core inference engine instead of Python diffusers, ONNX Runtime, or custom implementation.
+
+### Rationale
+
+**Performance:**
+- Pure C/C++ implementation for maximum performance
+- GGML-based tensor operations (same as llama.cpp, familiar ecosystem)
+- Supports quantization reducing memory by 2-8x
+- GPU acceleration via Metal (Apple), Vulkan (cross-platform), CUDA (NVIDIA), OpenCL
+
+**Model Support:**
+- Comprehensive support for diffusion models:
+  - SD1.x, SD2.x, SD-Turbo
+  - SDXL, SDXL-Turbo
+  - SD3/SD3.5
+  - FLUX.1-dev/schnell, FLUX.2-dev/klein
+  - Wan2.1/Wan2.2 (video generation)
+  - Qwen Image, Z-Image
+- LoRA, ControlNet support
+- GGUF, safetensors, checkpoint format support
+
+**Development Velocity:**
+- Active development with regular releases
+- Community adding new model support rapidly
+- Mirrors llama.cpp architecture (familiar patterns)
+
+### Alternatives Considered
+
+1. **Python Diffusers (Hugging Face)**
+   - ✅ Comprehensive model support
+   - ✅ Easy to use
+   - ❌ Requires Python runtime
+   - ❌ Heavy memory footprint
+   - ❌ Poor mobile support
+   - ❌ Complex deployment
+
+2. **ONNX Runtime**
+   - ✅ Cross-platform
+   - ✅ Good mobile support
+   - ❌ Requires model conversion
+   - ❌ Limited quantization support
+   - ❌ No native LoRA/ControlNet support
+   - ❌ Complex pipeline orchestration
+
+3. **TensorRT (NVIDIA)**
+   - ✅ Excellent NVIDIA GPU performance
+   - ❌ NVIDIA-only (no AMD, Apple, mobile)
+   - ❌ Requires model compilation per GPU
+   - ❌ Large binary size
+
+4. **Core ML (Apple)**
+   - ✅ Excellent Apple device performance
+   - ❌ Apple-only
+   - ❌ Limited model support
+   - ❌ Requires model conversion
+
+**Why stable-diffusion.cpp Won:**
+- Broadest platform support (desktop + mobile, all major OSes)
+- Pure C++ with no external runtime dependencies
+- GGML integration (consistent with our llama.cpp stack)
+- Active development and growing model support
+- Multiple GPU backends in single codebase
+- Quantization support for memory efficiency
+
+---
+
+## Decision 2: Bare Runtime over Node.js
+
+See [qvac-lib-inference-addon-cpp Decision 4: Why Bare Runtime](https://github.com/tetherto/qvac-lib-inference-addon-cpp/blob/main/docs/architecture.md#decision-4-why-bare-runtime) for rationale.
+
+**Summary:** Mobile support (iOS/Android), lightweight, modern addon API. Core business logic remains runtime-agnostic.
+
+---
+
+## Decision 3: Pluggable Data Loader Architecture
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Abstract data loading via WeightsProvider interface  
+**Why:** Support multiple distribution methods (P2P, HTTP, local files, S3)  
+**Cost:** Additional abstraction layer, must implement loader interface
+
+</details>
+
+### Context
+
+Need to load multi-GB model files from various sources:
+- Local filesystem (for offline/development)
+- P2P networks (for privacy/decentralization)
+- HTTP/CDN (for enterprise deployments)
+- Cloud storage (S3, Azure Blob, etc.)
+
+Diffusion models typically consist of multiple components (UNet, VAE, CLIP text encoders, safety checker) that may be distributed separately.
+
+### Decision
+
+Create a pluggable data loader abstraction (WeightsProvider interface) that decouples model loading from the inference engine, allowing applications to choose their distribution strategy.
+
+### Rationale
+
+**Flexibility:**
+- Different users have different distribution needs
+- Enterprise may require HTTP/CDN, privacy users may prefer P2P
+- Development/testing needs local filesystem access
+
+**Multi-Component Models:**
+- Diffusion models have multiple weight files (UNet, VAE, text encoder)
+- LoRA weights loaded separately
+- ControlNet models as add-ons
+- Loader abstraction handles all components uniformly
+
+**Extensibility:**
+- Applications can implement custom loaders
+- Future-proof: new distribution methods don't require engine changes
+
+### Trade-offs
+- ✅ Can mock loaders for unit testing
+- ❌ Additional abstraction complexity
+- ❌ Applications must choose/implement their loader
+
+---
+
+## Decision 4: Incremental Buffer-Based Weight Loading
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Buffer-based weight loader using custom std::streambuf over JavaScript ArrayBuffers  
+**Why:** Avoid storage duplication, zero-copy, supports loading from P2P sources  
+**Cost:** Complex streambuf implementation, JavaScript reference lifecycle management
+
+</details>
+
+### Context
+
+Diffusion models are large (2-10+ GB). stable-diffusion.cpp expects weight data as files or buffers. Loading directly from Hyperdrive (P2P) without duplicating to disk is essential for mobile devices with limited storage.
+
+### Decision
+
+Implement custom `std::streambuf` over JavaScript-owned ArrayBuffers with incremental chunk loading, as provided by `qvac-lib-inference-addon-cpp` framework.
+
+### Rationale
+
+**Avoid Storage Duplication:**
+- Load directly from Hyperdrive streams without saving to disk
+- No temporary files consuming additional storage
+- Critical for mobile devices with limited storage
+
+**Zero-Copy:**
+- C++ reads directly from JavaScript ArrayBuffer memory
+- No memcpy of multi-GB model files
+
+**Component Loading:**
+- Load UNet, VAE, CLIP sequentially
+- Report progress per component
+- Handle optional components (LoRA, ControlNet) dynamically
+
+### Trade-offs
+- ✅ Can report loading progress per component
+- ❌ Complex streambuf implementation
+- ❌ Must keep JS buffers alive during load
+
+---
+
+## Decision 5: Generation Parameters Format (JSON Serialization)
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Serialize generation parameters to JSON string before crossing JS/C++ boundary  
+**Why:** Simple marshalling, familiar pattern, extensible for new parameters  
+**Cost:** JSON parsing overhead per inference call
+
+</details>
+
+### Context
+
+Need to pass complex generation parameters from JavaScript to C++:
+- Prompt and negative prompt
+- Image dimensions (width, height)
+- Sampling parameters (steps, cfg_scale, sampler, seed)
+- Optional inputs (init image for img2img, LoRA configs, ControlNet)
+
+### Decision
+
+Serialize generation parameters to JSON string before passing to C++.
+
+### Rationale
+
+**Simplicity:**
+- Single string parameter instead of complex nested objects
+- JSON parsing well-supported in both JavaScript and C++
+- Consistent with llm-llamacpp pattern
+
+**Extensibility:**
+- Easy to add new parameters without changing C++ interface
+- Optional parameters naturally handled (absent = default)
+- LoRA configs, ControlNet settings as nested objects
+
+### Trade-offs
+- ✅ Portable and well-understood format
+- ❌ Serialization overhead on every call
+- ❌ No compile-time type checking across boundary
+
+### Parameter Schema
+
+```typescript
+interface Txt2ImgParams {
+  prompt: string;
+  negative_prompt?: string;
+  width?: number;           // default: 512
+  height?: number;          // default: 512
+  steps?: number;           // default: 20
+  cfg_scale?: number;       // default: 7.0
+  sampler?: string;         // 'euler_a' | 'euler' | 'dpm++_2m' | etc.
+  seed?: number;            // -1 for random
+  batch_count?: number;     // default: 1
+  loras?: LoraConfig[];
+  controlnet?: ControlNetConfig;
+}
+
+interface Img2ImgParams extends Txt2ImgParams {
+  init_image: Uint8Array;   // PNG/JPEG bytes
+  strength?: number;        // 0.0-1.0, default: 0.75
+}
+
+interface Txt2VidParams {
+  prompt: string;
+  negative_prompt?: string;
+  width?: number;
+  height?: number;
+  frames?: number;
+  fps?: number;
+  steps?: number;
+  cfg_scale?: number;
+  seed?: number;
+}
+```
+
+---
+
+## Decision 6: Exclusive Run Queue (index.js)
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Promise-based exclusive run queue using `_withExclusiveRun()` wrapper  
+**Why:** Ensure generation jobs complete without interruption (long-running operations)  
+**Cost:** One generation at a time per model instance
+
+</details>
+
+### Context
+
+Diffusion generation takes significant time (seconds to minutes). Without coordination, concurrent requests could interfere. The addon returns `false` (not accepted) if a job is already running.
+
+### Decision
+
+Implement JavaScript-level promise queue ensuring only one generation job runs at a time per model instance.
+
+### Rationale
+
+**Resource Management:**
+- GPU memory fully utilized during generation
+- No partial state from interrupted generations
+- Predictable VRAM usage
+
+**Progress Integrity:**
+- Step progress callbacks correspond to single job
+- No mixing of progress from concurrent requests
+
+### Trade-offs
+- ✅ Simple promise-based queue
+- ✅ Predictable execution order
+- ❌ One request at a time per instance
+- ❌ Long generations block subsequent requests
+
+**Mitigation:** For batch generation, use batch_count parameter; for parallel jobs, create multiple model instances
+
+---
+
+## Decision 7: TypeScript Definitions
+
+<details>
+<summary>⚡ TL;DR</summary>
+
+**Chose:** Hand-written TypeScript definitions (index.d.ts)  
+**Why:** Type safety, IDE support, API documentation  
+**Cost:** Manual maintenance, must keep in sync with implementation
+
+</details>
+
+### Context
+
+Developers expect TypeScript support for better IDE experience, autocomplete, and compile-time checking.
+
+### Decision
+
+Provide hand-written TypeScript definitions in `index.d.ts`.
+
+### Rationale
+
+**Developer Experience:**
+- IDE autocomplete for methods and parameters
+- Compile-time error checking
+- Clear parameter types for generation options
+
+**Documentation:**
+- Types serve as living API documentation
+- Clear contracts for all public methods
+
+### Trade-offs
+- ✅ Catch errors at compile time
+- ❌ Maintenance burden (must keep .d.ts in sync)
+
+---
+
+# Technical Debt
+
+### 1. Limited Error Context
+**Status:** C++ exceptions lose stack traces crossing JS boundary  
+**Issue:** Generic error messages make debugging difficult  
+**Root Cause:** Bare's `js.h` doesn't support error stacks  
+**Plan:** Implement structured error objects with error codes and context
+
+---
+
+**Last Updated:** 2026-02-23
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/docs/data-flows-detailed.md b/packages/qvac-lib-infer-stable-diffusion-cpp/docs/data-flows-detailed.md
new file mode 100644
index 0000000000..7620c6bb1f
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/docs/data-flows-detailed.md
@@ -0,0 +1,167 @@
+# Detailed Data Flows
+
+This document contains detailed diagrams showing how data moves through the `@qvac/img-stable-diffusion-cpp` system.
+
+**Audience:** Developers debugging complex behavior, contributors understanding system interactions.
+
+> **⚠️ Note:** These detailed diagrams are intended for initial reference and can quickly become outdated as the codebase evolves. For exact debugging and deep understanding, regenerate diagrams from the actual code or trace through the implementation directly.
+
+<details>
+<summary>⚡ TL;DR: Data Flow Overview</summary>
+
+**Communication Pattern:**
+- Two-thread architecture: JavaScript thread + dedicated C++ processing thread
+- Synchronization via mutex and condition variables
+- Cross-thread flow: JS → submit job via `runJob(params)` → wake C++ → process diffusion steps → output → uv_async_send → JS callback
+
+**Generation Path:**
+- JS calls `txt2img(params)` → returns QvacImageResponse immediately (non-blocking)
+- JS serializes params to JSON, calls `addon.runJob(paramsJson)` once; returns boolean (accepted or job already active)
+- C++ single-job runner takes the job, executes diffusion loop → generates image
+- Queues progress/output events → triggers JS callback asynchronously
+- Emits: StepProgress, Output (final image), JobStarted, JobEnded, Error
+
+</details>
+
+## Table of Contents
+
+- [Text-to-Image Generation Flow](#text-to-image-generation-flow)
+
+---
+
+## Text-to-Image Generation Flow
+
+### High-Level Flow
+
+```mermaid
+flowchart TD
+    Start([JS: model.txt2img]) --> ParseParams[Parse generation params]
+    ParseParams --> SerializeJSON[Serialize to JSON]
+    
+    SerializeJSON --> RunJob[addon.runJob(paramsJson)]
+    RunJob --> CreateResp[Create QvacImageResponse]
+    CreateResp --> ReturnJS([Return to JavaScript])
+    
+    RunJob -.->|Enters native| LockMutex[Lock mutex]
+    LockMutex --> SetJob[Set single job input]
+    SetJob --> NotifyCV[Notify condition variable]
+    NotifyCV --> UnlockMutex[Unlock mutex]
+    
+    NotifyCV -.->|Wakes| ProcThread[Processing Thread]
+    
+    ProcThread --> WaitWork{Has work?}
+    WaitWork -->|No| SleepCV[cv.wait]
+    SleepCV --> WaitWork
+    
+    WaitWork -->|Yes| LockProc[Lock mutex]
+    LockProc --> TakeJob[Take job input]
+    TakeJob --> UnlockProc[Unlock mutex]
+    UnlockProc --> EmitStart[Queue JobStarted event]
+    EmitStart --> SendAsync1[uv_async_send]
+    
+    SendAsync1 --> ParseJSON[Parse JSON params]
+    ParseJSON --> EncodePrompt[Encode prompt (CLIP)]
+    EncodePrompt --> EncodeNeg[Encode negative prompt]
+    EncodeNeg --> InitLatents[Initialize random latents (seed)]
+    
+    InitLatents --> DiffusionLoop{Diffusion Loop}
+    DiffusionLoop -->|Continue| PredictNoise[UNet predict noise]
+    PredictNoise --> ApplyCFG[Apply CFG guidance]
+    ApplyCFG --> SchedulerStep[Scheduler step]
+    SchedulerStep --> QueueProgress[Queue StepProgress event]
+    QueueProgress --> SendAsync2[uv_async_send]
+    SendAsync2 --> DiffusionLoop
+    
+    DiffusionLoop -->|Complete| VAEDecode[VAE decode]
+    VAEDecode --> EncodePNG[Encode to PNG]
+    EncodePNG --> QueueOutput[Queue Output event]
+    QueueOutput --> GetStats[Collect runtime stats]
+    GetStats --> QueueJobEnd[Queue JobEnded event]
+    QueueJobEnd --> SendAsync3[uv_async_send]
+    SendAsync3 --> ProcThread
+    
+    DiffusionLoop -->|Error| QueueError[Queue Error event]
+    QueueError --> ResetModel[model.reset]
+    ResetModel --> SendAsync3
+    
+    SendAsync2 -.->|Triggers| UVCallback[UV async callback]
+    UVCallback --> LockCB[Lock output mutex]
+    LockCB --> DequeueOutputs[Dequeue all outputs]
+    DequeueOutputs --> UnlockCB[Unlock mutex]
+    UnlockCB --> ForEach[For each output event]
+    
+    ForEach --> InvokeJS[Call JavaScript outputCb]
+    InvokeJS --> UpdateResponse[QvacImageResponse emits]
+    UpdateResponse --> ProgressYield([onStep callback / await])
+```
+
+<details>
+<summary>📊 LLM-Friendly: Generation Flow Breakdown</summary>
+
+**Phase 1: Job Submission (JavaScript → C++)**
+
+| Step | Thread | Duration | Operation | Blocking? |
+|------|--------|----------|-----------|-----------|
+| 1 | JS | <0.1ms | Parse params | No |
+| 2 | JS | <0.1ms | Serialize to JSON | No |
+| 3 | JS | <1ms | Call addon.runJob(params) | No |
+| 4 | JS | <0.1ms | Lock mutex | No |
+| 5 | JS | <0.1ms | Set job input | No |
+| 6 | JS | <0.1ms | Signal CV | No |
+| 7 | JS | <0.1ms | Unlock mutex | No |
+| 8 | JS | <0.1ms | Return accepted (boolean) | No |
+| 9 | C++ | - | Wake from cv.wait() | - |
+
+**Phase 2: Processing (C++ Background Thread)**
+
+| Step | Thread | Duration | Operation | Blocks JS? |
+|------|--------|----------|-----------|------------|
+| 10 | C++ | <0.1ms | Lock mutex | No |
+| 11 | C++ | <0.1ms | Take job input | No |
+| 12 | C++ | <0.1ms | Unlock mutex | No |
+| 13 | C++ | <1ms | Parse JSON params | No |
+| 14 | C++ | 50-200ms | Encode prompts (CLIP) | No |
+| 15 | C++ | <10ms | Initialize latents | No |
+| 16 | C++ | 100-500ms per step | UNet inference | No |
+| 17 | C++ | 200-1000ms | VAE decode | No |
+| 18 | C++ | 10-50ms | PNG encode | No |
+
+**Phase 3: Output Delivery (C++ → JavaScript)**
+
+| Step | Thread | Duration | Operation | Details |
+|------|--------|----------|-----------|---------|
+| 19 | C++ | <0.1ms | Lock output mutex | Per step |
+| 20 | C++ | <0.1ms | Queue progress | Per step |
+| 21 | C++ | <0.1ms | Unlock mutex | Per step |
+| 22 | C++ | <0.1ms | uv_async_send() | May coalesce |
+| 23 | JS | - | UV schedules callback | Next tick |
+| 24 | JS | <0.1ms | Lock mutex | Batch |
+| 25 | JS | <0.1ms | Drain outputs | Batch |
+| 26 | JS | <0.1ms | Unlock mutex | Batch |
+| 27 | JS | Varies | Invoke outputCb | User code |
+
+**Event Types:**
+
+| Event | When | Data | Purpose |
+|-------|------|------|---------|
+| JobStarted | Processing begins | {jobId, timestamp} | Track start |
+| StepProgress | Each diffusion step | {jobId, step, totalSteps} | Progress UI |
+| Output | Generation complete | {jobId, image: Uint8Array, format: 'png'} | Final image |
+| JobEnded | All processing done | {jobId, stats: RuntimeStats} | Track completion |
+| Error | Processing fails | {jobId, error: string} | Error handling |
+
+**Performance Characteristics:**
+
+- Job queueing: <1ms total
+- Prompt encoding: 50-200ms (depends on prompt length)
+- Diffusion steps: 100-500ms per step (model and GPU dependent)
+- VAE decoding: 200-1000ms (resolution dependent)
+- Total 512x512, 20 steps: ~5-15 seconds
+- Total 1024x1024, 20 steps: ~15-60 seconds
+
+</details>
+
+**Related Documents:**
+- [architecture.md](architecture.md) - Complete architecture documentation
+
+**Last Updated:** 2026-02-23
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/index.d.ts b/packages/qvac-lib-infer-stable-diffusion-cpp/index.d.ts
new file mode 100644
index 0000000000..29249239bd
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/index.d.ts
@@ -0,0 +1,189 @@
+import BaseInference, {
+  ReportProgressCallback
+} from '@qvac/infer-base/WeightsProvider/BaseInference'
+import type { QvacResponse } from '@qvac/infer-base'
+import type QvacLogger from '@qvac/logging'
+
+export type NumericLike = number | `${number}`
+
+export interface Loader {
+  ready(): Promise<void>
+  close(): Promise<void>
+  getStream(path: string): Promise<AsyncIterable<Uint8Array>>
+  download(
+    path: string,
+    opts: { diskPath: string; progressReporter?: unknown }
+  ): Promise<{ await(): Promise<void> }>
+  getFileSize?(path: string): Promise<number>
+}
+
+export interface Addon {
+  activate(): Promise<void>
+  runJob(params: GenerationParams): Promise<boolean>
+  cancel(): Promise<void>
+  unload(): Promise<void>
+}
+
+/** Supported diffusion sampling methods */
+export type SamplerMethod =
+  | 'euler_a'
+  | 'euler'
+  | 'heun'
+  | 'dpm2'
+  | 'dpm++_2m'
+  | 'dpm++_2m_v2'
+  | 'dpm++_2s_a'
+  | 'lcm'
+
+/** Supported weight quantization types */
+export type WeightType =
+  | 'default'
+  | 'f32'
+  | 'f16'
+  | 'q4_0'
+  | 'q4_1'
+  | 'q5_0'
+  | 'q5_1'
+  | 'q8_0'
+
+/** Supported RNG types */
+export type RngType = 'cuda' | 'cpu'
+
+/** Supported sampling schedules */
+export type ScheduleType = 'default' | 'discrete' | 'karras' | 'exponential' | 'ays' | 'gits'
+
+export interface SdConfig {
+  /** Number of CPU threads (-1 = auto) */
+  threads?: NumericLike
+  /** Preferred compute device: 'gpu' or 'cpu' */
+  device?: 'gpu' | 'cpu'
+  /** Weight quantization type */
+  wtype?: WeightType
+  /** RNG type for reproducible generation */
+  rng?: RngType
+  /** Sampling schedule */
+  schedule?: ScheduleType
+  /** Run CLIP encoder on CPU even when GPU is available */
+  clip_on_cpu?: boolean
+  /** Run VAE decoder on CPU even when GPU is available */
+  vae_on_cpu?: boolean
+  /** Enable VAE tiling to reduce VRAM usage */
+  vae_tiling?: boolean
+  /** Enable flash attention for memory efficiency */
+  flash_attn?: boolean
+  /** Logging verbosity: 0=error, 1=warn, 2=info, 3=debug */
+  verbosity?: NumericLike
+  [key: string]: string | number | boolean | undefined
+}
+
+export interface GenerationParams {
+  mode: 'txt2img' | 'img2img' | 'txt2vid'
+  prompt: string
+  negative_prompt?: string
+  width?: number
+  height?: number
+  steps?: number
+  cfg_scale?: number
+  sampler?: SamplerMethod
+  seed?: number
+  batch_count?: number
+  /** img2img only: input image as PNG/JPEG bytes */
+  init_image?: Uint8Array
+  /** img2img only: denoising strength (0.0–1.0) */
+  strength?: number
+  /** txt2vid only: number of frames */
+  frames?: number
+  /** txt2vid only: frames per second */
+  fps?: number
+}
+
+export interface Txt2ImgParams {
+  prompt: string
+  negative_prompt?: string
+  width?: number
+  height?: number
+  steps?: number
+  cfg_scale?: number
+  sampler?: SamplerMethod
+  seed?: number
+  batch_count?: number
+}
+
+export interface Img2ImgParams extends Txt2ImgParams {
+  init_image: Uint8Array
+  strength?: number
+}
+
+export interface Txt2VidParams {
+  prompt: string
+  negative_prompt?: string
+  width?: number
+  height?: number
+  frames?: number
+  fps?: number
+  steps?: number
+  cfg_scale?: number
+  sampler?: SamplerMethod
+  seed?: number
+}
+
+export interface ImgStableDiffusionArgs {
+  loader: Loader
+  logger?: QvacLogger | Console | null
+  opts?: { stats?: boolean }
+  diskPath?: string
+  modelName: string
+  clipLModel?: string
+  clipGModel?: string
+  t5XxlModel?: string
+  vaeModel?: string
+}
+
+export interface DownloadWeightsOptions {
+  closeLoader?: boolean
+}
+
+export interface DownloadResult {
+  filePath: string | null
+  error: boolean
+  completed: boolean
+}
+
+export interface StepProgressEvent {
+  step: number
+  total: number
+  elapsed_ms?: number
+}
+
+export default class ImgStableDiffusion extends BaseInference {
+  protected addon: Addon
+
+  constructor(args: ImgStableDiffusionArgs, config: SdConfig)
+
+  _load(
+    closeLoader?: boolean,
+    onDownloadProgress?: ReportProgressCallback | ((bytes: number) => void)
+  ): Promise<void>
+
+  load(
+    closeLoader?: boolean,
+    onDownloadProgress?: ReportProgressCallback | ((bytes: number) => void)
+  ): Promise<void>
+
+  downloadWeights(
+    onDownloadProgress?: (progress: Record<string, any>, opts: DownloadWeightsOptions) => any,
+    opts?: DownloadWeightsOptions
+  ): Promise<Record<string, DownloadResult>>
+
+  txt2img(params: Txt2ImgParams): Promise<QvacResponse>
+
+  img2img(params: Img2ImgParams): Promise<QvacResponse>
+
+  txt2vid(params: Txt2VidParams): Promise<QvacResponse>
+
+  unload(): Promise<void>
+
+  cancel(): Promise<void>
+}
+
+export { ReportProgressCallback, QvacResponse }
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/index.js b/packages/qvac-lib-infer-stable-diffusion-cpp/index.js
new file mode 100644
index 0000000000..3c7f00ea7e
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/index.js
@@ -0,0 +1,263 @@
+'use strict'
+
+const path = require('bare-path')
+
+const BaseInference = require('@qvac/infer-base/WeightsProvider/BaseInference')
+const WeightsProvider = require('@qvac/infer-base/WeightsProvider/WeightsProvider')
+const { SdInterface } = require('./addon')
+
+const noop = () => {}
+
+/** Max ms to wait for the previous job to finish before throwing. */
+const PREVIOUS_JOB_WAIT_MS = 30
+const RUN_BUSY_ERROR_MESSAGE = 'Cannot set new job: a job is already set or being processed'
+
+/**
+ * Image and video generation using stable-diffusion.cpp.
+ * Supports SD1.x, SD2.x, SDXL, SD3, FLUX, Wan2.x video models.
+ */
+class ImgStableDiffusion extends BaseInference {
+  /**
+   * @param {object} args
+   * @param {object} args.loader - Data loader (Hyperdrive, filesystem, etc.)
+   * @param {object} [args.logger] - Structured logger
+   * @param {object} [args.opts] - Optional inference options
+   * @param {string} [args.diskPath='.'] - Local directory for downloaded weights
+   * @param {string} args.modelName - Model file name (e.g. 'flux1-dev-q4_0.gguf')
+   * @param {string} [args.clipLModel] - Optional CLIP-L model file name
+   * @param {string} [args.clipGModel] - Optional CLIP-G model file name
+   * @param {string} [args.t5XxlModel] - Optional T5-XXL text encoder file name
+   * @param {string} [args.vaeModel] - Optional VAE file name
+   * @param {object} config - SD context configuration (threads, device, wtype, etc.)
+   */
+  constructor (
+    {
+      opts = {},
+      loader,
+      logger = null,
+      diskPath = '.',
+      modelName,
+      clipLModel,
+      clipGModel,
+      t5XxlModel,
+      vaeModel
+    },
+    config
+  ) {
+    super({ logger, opts })
+    this._config = config
+    this._diskPath = diskPath
+    this._modelName = modelName
+    this._clipLModel = clipLModel || null
+    this._clipGModel = clipGModel || null
+    this._t5XxlModel = t5XxlModel || null
+    this._vaeModel = vaeModel || null
+    this.weightsProvider = new WeightsProvider(loader, this.logger)
+    this._lastJobResult = Promise.resolve()
+  }
+
+  /**
+   * Load model weights, initialize the native addon, and activate.
+   * @param {boolean} [closeLoader=true]
+   * @param {Function} [onDownloadProgress]
+   */
+  async _load (closeLoader = true, onDownloadProgress = noop) {
+    this.logger.info('Starting stable-diffusion model load')
+
+    try {
+      const filesToDownload = [this._modelName]
+      if (this._clipLModel) filesToDownload.push(this._clipLModel)
+      if (this._clipGModel) filesToDownload.push(this._clipGModel)
+      if (this._t5XxlModel) filesToDownload.push(this._t5XxlModel)
+      if (this._vaeModel) filesToDownload.push(this._vaeModel)
+
+      await this.weightsProvider.downloadFiles(filesToDownload, this._diskPath, {
+        closeLoader,
+        onDownloadProgress
+      })
+
+      const configurationParams = {
+        path: path.join(this._diskPath, this._modelName),
+        clipLPath: this._clipLModel ? path.join(this._diskPath, this._clipLModel) : '',
+        clipGPath: this._clipGModel ? path.join(this._diskPath, this._clipGModel) : '',
+        t5XxlPath: this._t5XxlModel ? path.join(this._diskPath, this._t5XxlModel) : '',
+        vaePath: this._vaeModel ? path.join(this._diskPath, this._vaeModel) : '',
+        config: this._config
+      }
+
+      this.logger.info('Creating stable-diffusion addon with configuration:', configurationParams)
+      this.addon = this._createAddon(configurationParams)
+
+      this.logger.info('Activating stable-diffusion addon')
+      await this.addon.activate()
+
+      this.logger.info('Stable-diffusion model load completed successfully')
+    } catch (error) {
+      this.logger.error('Error during stable-diffusion model load:', error)
+      throw error
+    }
+  }
+
+  /**
+   * @param {Function} [onDownloadProgress]
+   * @param {object} [opts]
+   */
+  async _downloadWeights (onDownloadProgress, opts) {
+    const filesToDownload = [this._modelName]
+    if (this._clipLModel) filesToDownload.push(this._clipLModel)
+    if (this._clipGModel) filesToDownload.push(this._clipGModel)
+    if (this._t5XxlModel) filesToDownload.push(this._t5XxlModel)
+    if (this._vaeModel) filesToDownload.push(this._vaeModel)
+
+    return this.weightsProvider.downloadFiles(filesToDownload, this._diskPath, {
+      closeLoader: opts.closeLoader,
+      onDownloadProgress
+    })
+  }
+
+  /**
+   * @param {object} configurationParams
+   * @returns {SdInterface}
+   */
+  _createAddon (configurationParams) {
+    const binding = require('./binding')
+    return new SdInterface(
+      binding,
+      configurationParams,
+      this._addonOutputCallback.bind(this)
+    )
+  }
+
+  _addonOutputCallback (addon, event, data, error) {
+    if (typeof data === 'object' && data !== null && 'generation_time' in data) {
+      return this._outputCallback(addon, 'JobEnded', 'OnlyOneJob', data, null)
+    }
+
+    let mappedEvent = event
+    if (event.includes('Error')) {
+      mappedEvent = 'Error'
+    } else if (data instanceof Uint8Array) {
+      mappedEvent = 'Output'
+    } else if (typeof data === 'string') {
+      try {
+        const parsed = JSON.parse(data)
+        if ('step' in parsed && 'total' in parsed) {
+          mappedEvent = 'StepProgress'
+        }
+      } catch (_) {
+        mappedEvent = 'Output'
+      }
+    }
+
+    return this._outputCallback(addon, mappedEvent, 'OnlyOneJob', data, error)
+  }
+
+  /**
+   * Cancel the current generation job.
+   */
+  async cancel () {
+    if (this.addon?.cancel) {
+      await this.addon.cancel()
+    }
+  }
+
+  /**
+   * Unload the model and release all resources.
+   */
+  async unload () {
+    return this._withExclusiveRun(async () => {
+      await this.cancel()
+      const currentJobResponse = this._jobToResponse.get('OnlyOneJob')
+      if (currentJobResponse) {
+        currentJobResponse.failed(new Error('Model was unloaded'))
+        this._deleteJobMapping('OnlyOneJob')
+      }
+      await super.unload()
+    })
+  }
+
+  /**
+   * Generate an image from text.
+   * @param {object} params - Generation parameters
+   * @param {string} params.prompt
+   * @param {string} [params.negative_prompt]
+   * @param {number} [params.width=512]
+   * @param {number} [params.height=512]
+   * @param {number} [params.steps=20]
+   * @param {number} [params.cfg_scale=7.0]
+   * @param {string} [params.sampler='euler_a']
+   * @param {number} [params.seed=-1]
+   * @param {number} [params.batch_count=1]
+   * @returns {Promise<QvacResponse>}
+   */
+  async txt2img (params) {
+    return this._runGeneration({ ...params, mode: 'txt2img' })
+  }
+
+  /**
+   * Generate an image from an input image and text.
+   * @param {object} params
+   * @param {Uint8Array} params.init_image - Input image bytes (PNG/JPEG)
+   * @param {number} [params.strength=0.75] - Denoising strength (0.0-1.0)
+   * @returns {Promise<QvacResponse>}
+   */
+  async img2img (params) {
+    if (!params.init_image) {
+      throw new Error('img2img requires init_image parameter')
+    }
+    return this._runGeneration({ ...params, mode: 'img2img' })
+  }
+
+  /**
+   * Generate a video from text (requires Wan2.x or similar video model).
+   * @param {object} params
+   * @param {string} params.prompt
+   * @param {number} [params.frames=16]
+   * @param {number} [params.fps=8]
+   * @returns {Promise<QvacResponse>}
+   */
+  async txt2vid (params) {
+    return this._runGeneration({ ...params, mode: 'txt2vid' })
+  }
+
+  async _runGeneration (params) {
+    this.logger.info('Starting generation with mode:', params.mode)
+
+    return this._withExclusiveRun(async () => {
+      await new Promise((resolve, reject) => {
+        const timer = setTimeout(() => {
+          reject(new Error(RUN_BUSY_ERROR_MESSAGE))
+        }, PREVIOUS_JOB_WAIT_MS)
+        this._lastJobResult
+          .then(() => { clearTimeout(timer); resolve() })
+          .catch(() => { clearTimeout(timer); resolve() })
+      })
+
+      const response = this._createResponse('OnlyOneJob')
+
+      let accepted
+      try {
+        accepted = await this.addon.runJob(params)
+      } catch (error) {
+        this._deleteJobMapping('OnlyOneJob')
+        response.failed(error)
+        throw error
+      }
+
+      if (!accepted) {
+        this._deleteJobMapping('OnlyOneJob')
+        const msg = RUN_BUSY_ERROR_MESSAGE
+        response.failed(new Error(msg))
+        throw new Error(msg)
+      }
+
+      this._lastJobResult = response.await()
+
+      this.logger.info('Generation job started successfully')
+
+      return response
+    })
+  }
+}
+
+module.exports = ImgStableDiffusion
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/package.json b/packages/qvac-lib-infer-stable-diffusion-cpp/package.json
new file mode 100644
index 0000000000..f2cd601cc2
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/package.json
@@ -0,0 +1,72 @@
+{
+  "name": "@qvac/img-stable-diffusion-cpp",
+  "version": "0.1.0",
+  "description": "stable-diffusion.cpp addon for qvac image/video generation",
+  "addon": true,
+  "scripts": {
+    "build": "bare-make generate && bare-make build && bare-make install",
+    "build:pack": "mkdir -p dist && npm pack --pack-destination dist",
+    "mobile:copy-prebuilds": "cp -r prebuilds/android-arm64 prebuilds/android-ia32 || echo 'Warning: Failed to copy sd prebuilds to android-ia32'; cp -r prebuilds/android-arm64 prebuilds/android-arm || echo 'Warning: Failed to copy sd prebuilds to android-arm'; cp -r prebuilds/android-arm64 prebuilds/android-x64 || echo 'Warning: Failed to copy sd prebuilds to android-x64'; cp -r prebuilds/ios-arm64 prebuilds/ios-arm64-simulator 2>/dev/null || echo 'iOS prebuilds already present'; cp -r prebuilds/ios-arm64 prebuilds/ios-x64-simulator 2>/dev/null || echo 'iOS prebuilds already present'",
+    "lint": "standard --ignore \"addon/**\"",
+    "lint:fix": "standard --ignore \"addon/**\" --fix",
+    "lint-cpp": "clang-tidy -p build $(find addon -name '*.cpp')",
+    "test": "npm run test:integration",
+    "test:integration": "bare test/integration/all.js --exit",
+    "test:dts": "tsc -p tsconfig.dts.json"
+  },
+  "files": [
+    "binding.js",
+    "index.js",
+    "addon.js",
+    "addonLogging.js",
+    "addonLogging.d.ts",
+    "prebuilds",
+    "index.d.ts",
+    "LICENSE",
+    "NOTICE"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp.git"
+  },
+  "author": "Tether",
+  "license": "Apache-2.0",
+  "bugs": "https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp/issues",
+  "homepage": "https://github.com/tetherto/qvac-lib-infer-stable-diffusion-cpp#readme",
+  "devDependencies": {
+    "@qvac/dl-filesystem": "^0.1.2",
+    "@qvac/dl-hyperdrive": "^0.1.1",
+    "bare-buffer": "^3.4.2",
+    "bare-fs": "^4.5.1",
+    "cmake-bare": "1.7.5",
+    "cmake-vcpkg": "^1.1.0",
+    "standard": "^17.0.0",
+    "typescript": "^5.9.2"
+  },
+  "dependencies": {
+    "@qvac/infer-base": "^0.2.2",
+    "bare-path": "^3.0.0",
+    "bare-process": "^4.2.2"
+  },
+  "engines": {
+    "bare": ">=1.24.0"
+  },
+  "peerDependencies": {
+    "@qvac/dl-hyperdrive": "^0.1.1"
+  },
+  "exports": {
+    "./package": "./package.json",
+    ".": {
+      "types": "./index.d.ts",
+      "default": "./index.js"
+    },
+    "./addonLogging": {
+      "types": "./addonLogging.d.ts",
+      "default": "./addonLogging.js"
+    },
+    "./addonLogging.js": "./addonLogging.js",
+    "./addon.js": "./addon.js",
+    "./binding.js": "./binding.js"
+  },
+  "types": "index.d.ts"
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/tsconfig.dts.json b/packages/qvac-lib-infer-stable-diffusion-cpp/tsconfig.dts.json
new file mode 100644
index 0000000000..434a16acea
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/tsconfig.dts.json
@@ -0,0 +1,17 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "lib": ["ES2022"],
+    "types": ["node"],
+    "skipLibCheck": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "verbatimModuleSyntax": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "noEmit": true
+  },
+  "include": ["index.d.ts", "addonLogging.d.ts"]
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg-configuration.json b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg-configuration.json
new file mode 100644
index 0000000000..109dd4150b
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg-configuration.json
@@ -0,0 +1,21 @@
+{
+  "overlay-ports": [
+    "vcpkg/ports"
+  ],
+  "default-registry": {
+    "kind": "git",
+    "baseline": "803c0d119ea002694963e89237c207ff6ecf47f6",
+    "repository": "git@github.com:tetherto/qvac-registry-vcpkg.git"
+  },
+  "registries": [
+    {
+      "kind": "git",
+      "baseline": "16c71a39e5a0fc0bdb3fad03beef8f38ee00ee3b",
+      "repository": "https://github.com/microsoft/vcpkg",
+      "packages": [
+        "gtest",
+        "picojson"
+      ]
+    }
+  ]
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg.json b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg.json
new file mode 100644
index 0000000000..13b5cc8fed
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg.json
@@ -0,0 +1,60 @@
+{
+  "dependencies": [
+    {
+      "name": "opencl",
+      "platform": "android"
+    },
+    "picojson",
+    {
+      "name": "qvac-fabric",
+      "version>=": "7248.1.2"
+    },
+    {
+      "name": "qvac-lib-inference-addon-cpp",
+      "version>=": "1.1.2"
+    },
+    {
+      "name": "qvac-lint-cpp",
+      "version>=": "1.4.4"
+    },
+    {
+      "name": "stable-diffusion-cpp",
+      "version>=": "0.0.1"
+    }
+  ],
+  "features": {
+    "tests": {
+      "description": "Build tests",
+      "dependencies": [
+        "gtest"
+      ]
+    },
+    "metal": {
+      "description": "Enable Metal GPU backend (macOS/iOS)",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": ["metal"]
+        }
+      ]
+    },
+    "vulkan": {
+      "description": "Enable Vulkan GPU backend",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": ["vulkan"]
+        }
+      ]
+    },
+    "cuda": {
+      "description": "Enable CUDA GPU backend",
+      "dependencies": [
+        {
+          "name": "stable-diffusion-cpp",
+          "features": ["cuda"]
+        }
+      ]
+    }
+  }
+}
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/portfile.cmake b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/portfile.cmake
new file mode 100644
index 0000000000..1428e42d32
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/portfile.cmake
@@ -0,0 +1,147 @@
+# stable-diffusion.cpp vcpkg overlay port
+#
+# Fetches stable-diffusion.cpp from GitHub (including the ggml submodule)
+# and builds it as a static library.
+#
+# The port installs:
+#   - include/stable-diffusion.h       (main C API)
+#   - include/stb_image.h              (stb image loading)
+#   - include/stb_image_write.h        (stb PNG encoding)
+#   - lib/libstable-diffusion.a        (static library)
+#   - share/stable-diffusion-cpp/      (CMake config)
+#
+# GPU backend selection is controlled via vcpkg features:
+#   - metal   -> -DGGML_METAL=ON   (macOS/iOS, auto-enabled on Apple)
+#   - vulkan  -> -DGGML_VULKAN=ON
+#   - cuda    -> -DGGML_CUDA=ON
+#   - opencl  -> -DGGML_OPENCL=ON
+#
+# NOTE: This port uses vcpkg_from_git which clones the repo so that
+# git submodule init/update works for ggml.
+# Update REF to pin a specific commit for reproducible builds.
+
+set(SD_CPP_REPO "https://github.com/leejet/stable-diffusion.cpp.git")
+set(SD_CPP_REF  "c5eb1e4f9a6a0dbd3cb1e8c6adff9d2c2ad78f11")  # master-505 (2026-02-19)
+
+vcpkg_from_git(
+    OUT_SOURCE_PATH SOURCE_PATH
+    URL             "${SD_CPP_REPO}"
+    REF             "${SD_CPP_REF}"
+)
+
+# Initialise the ggml submodule (bundled inside stable-diffusion.cpp)
+vcpkg_execute_required_process(
+    COMMAND "${GIT}" submodule update --init --recursive
+    WORKING_DIRECTORY "${SOURCE_PATH}"
+    LOGNAME           "git-submodule-stable-diffusion"
+)
+
+# --- Determine GPU feature flags ---
+set(SD_GGML_METAL   OFF)
+set(SD_GGML_VULKAN  OFF)
+set(SD_GGML_CUDA    OFF)
+set(SD_GGML_OPENCL  OFF)
+set(SD_FLASH_ATTN   OFF)
+
+if("metal" IN_LIST FEATURES)
+    set(SD_GGML_METAL ON)
+elseif(APPLE)
+    # Auto-enable Metal on Apple platforms even without the feature flag
+    set(SD_GGML_METAL ON)
+endif()
+
+if("vulkan" IN_LIST FEATURES)
+    set(SD_GGML_VULKAN ON)
+endif()
+
+if("cuda" IN_LIST FEATURES)
+    set(SD_GGML_CUDA ON)
+endif()
+
+if("opencl" IN_LIST FEATURES)
+    set(SD_GGML_OPENCL ON)
+endif()
+
+if("flash-attn" IN_LIST FEATURES)
+    set(SD_FLASH_ATTN ON)
+endif()
+
+# --- Configure and build ---
+vcpkg_cmake_configure(
+    SOURCE_PATH "${SOURCE_PATH}"
+    OPTIONS
+        -DBUILD_SHARED_LIBS=OFF
+        -DSD_BUILD_EXAMPLES=OFF
+        -DSD_BUILD_SHARED_LIBS=OFF
+        -DGGML_METAL=${SD_GGML_METAL}
+        -DGGML_VULKAN=${SD_GGML_VULKAN}
+        -DGGML_CUDA=${SD_GGML_CUDA}
+        -DGGML_OPENCL=${SD_GGML_OPENCL}
+        -DSD_FLASH_ATTN=${SD_FLASH_ATTN}
+)
+
+vcpkg_cmake_install()
+
+# --- Install stb headers for PNG encode/decode in consumer code ---
+if(EXISTS "${SOURCE_PATH}/thirdparty/stb/stb_image.h")
+    file(INSTALL "${SOURCE_PATH}/thirdparty/stb/stb_image.h"
+         DESTINATION "${CURRENT_PACKAGES_DIR}/include")
+    file(INSTALL "${SOURCE_PATH}/thirdparty/stb/stb_image_write.h"
+         DESTINATION "${CURRENT_PACKAGES_DIR}/include")
+elseif(EXISTS "${SOURCE_PATH}/thirdparty/stb_image.h")
+    file(INSTALL "${SOURCE_PATH}/thirdparty/stb_image.h"
+         DESTINATION "${CURRENT_PACKAGES_DIR}/include")
+    file(INSTALL "${SOURCE_PATH}/thirdparty/stb_image_write.h"
+         DESTINATION "${CURRENT_PACKAGES_DIR}/include")
+endif()
+
+# --- Create CMake config for find_package(stable-diffusion-cpp CONFIG REQUIRED) ---
+set(CONFIG_DIR "${CURRENT_PACKAGES_DIR}/share/stable-diffusion-cpp")
+file(MAKE_DIRECTORY "${CONFIG_DIR}")
+
+file(WRITE "${CONFIG_DIR}/stable-diffusion-cppConfig.cmake" [=[
+get_filename_component(_SD_CPP_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
+
+find_library(STABLE_DIFFUSION_LIBRARY
+    NAMES stable-diffusion
+    PATHS "${_SD_CPP_INSTALL_PREFIX}/lib"
+    NO_DEFAULT_PATH
+    REQUIRED
+)
+
+find_path(STABLE_DIFFUSION_INCLUDE_DIR
+    NAMES stable-diffusion.h
+    PATHS "${_SD_CPP_INSTALL_PREFIX}/include"
+    NO_DEFAULT_PATH
+    REQUIRED
+)
+
+if(NOT TARGET stable-diffusion::stable-diffusion)
+    add_library(stable-diffusion::stable-diffusion STATIC IMPORTED)
+    set_target_properties(stable-diffusion::stable-diffusion PROPERTIES
+        IMPORTED_LOCATION             "${STABLE_DIFFUSION_LIBRARY}"
+        INTERFACE_INCLUDE_DIRECTORIES "${STABLE_DIFFUSION_INCLUDE_DIR}"
+    )
+endif()
+]=])
+
+file(WRITE "${CONFIG_DIR}/stable-diffusion-cppConfigVersion.cmake" [=[
+set(PACKAGE_VERSION "0.0.1")
+if(PACKAGE_FIND_VERSION VERSION_GREATER PACKAGE_VERSION)
+    set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
+        set(PACKAGE_VERSION_EXACT TRUE)
+    endif()
+endif()
+]=])
+
+# Remove debug include dir (no debug headers needed)
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")
+
+# Install license
+vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/usage b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/usage
new file mode 100644
index 0000000000..f58deef663
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/usage
@@ -0,0 +1,16 @@
+The package stable-diffusion-cpp provides CMake integration:
+
+  find_package(stable-diffusion-cpp CONFIG REQUIRED)
+  target_link_libraries(main PRIVATE stable-diffusion::stable-diffusion)
+
+The following headers are available:
+  #include <stable-diffusion.h>   // main C API
+  #include <stb_image.h>          // image loading (stb)
+  #include <stb_image_write.h>    // PNG encoding (stb)
+
+Available vcpkg features:
+  metal      - Metal GPU backend (macOS/iOS, auto-enabled on Apple)
+  vulkan     - Vulkan GPU backend
+  cuda       - CUDA GPU backend
+  opencl     - OpenCL GPU backend (Android/Adreno)
+  flash-attn - Flash Attention memory optimisation
diff --git a/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/vcpkg.json b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/vcpkg.json
new file mode 100644
index 0000000000..16ce3ace9d
--- /dev/null
+++ b/packages/qvac-lib-infer-stable-diffusion-cpp/vcpkg/ports/stable-diffusion-cpp/vcpkg.json
@@ -0,0 +1,25 @@
+{
+  "name": "stable-diffusion-cpp",
+  "version": "0.0.1",
+  "description": "Diffusion model inference in pure C/C++ (SD1.x, SD2.x, SDXL, SD3, FLUX, Wan, ...)",
+  "homepage": "https://github.com/leejet/stable-diffusion.cpp",
+  "license": "MIT",
+  "dependencies": [],
+  "features": {
+    "metal": {
+      "description": "Enable Metal GPU acceleration (macOS/iOS)"
+    },
+    "vulkan": {
+      "description": "Enable Vulkan GPU acceleration"
+    },
+    "cuda": {
+      "description": "Enable CUDA GPU acceleration"
+    },
+    "opencl": {
+      "description": "Enable OpenCL GPU acceleration (Android/Adreno)"
+    },
+    "flash-attn": {
+      "description": "Enable Flash Attention for memory efficiency"
+    }
+  }
+}