tetherto · gianni-cor · May 25, 2026 · May 13, 2026 · May 13, 2026 · May 14, 2026
@@ -1,5 +1,20 @@
 # Changelog
 
+## [0.10.0] - 2026-05-25
+
+### Changed
+
+- Consume `stable-diffusion-cpp@2026-03-01#5` from `tetherto/qvac-registry-vcpkg` after sd.cpp PR #6 and registry PR #166, which add generic GGML ESRGAN upscaler backend init with device and preference APIs.
+- Remove the temporary package-local `stable-diffusion-cpp` vcpkg overlay port; the addon now resolves the port from the registry.
+
+### Added
+
+- Report `backendDevice` in ESRGAN upscaler runtime stats after load, reflecting the backend stable-diffusion.cpp selected for the upscaler context.
+
+### Fixed
+
+- On Android, ESRGAN `gpu` / `auto` config now falls back to CPU to avoid the unstable mobile GPU/OpenCL upscaler path.
+
 ## [0.9.1] - 2026-05-25
 
 ### Fixed

@@ -28,7 +28,15 @@ function mapAddonEvent (rawEvent, rawData, rawError) {
   // `ArrayBuffer.isView` is true for every TypedArray + DataView, false for
   // plain objects -- exactly the discrimination we want.
   if (rawData && typeof rawData === 'object' && !ArrayBuffer.isView(rawData)) {
-    return { type: 'JobEnded', data: rawData, error: null }
+    const data = { ...rawData }
+    if (typeof data.backendDevice === 'number') {
+      if (data.backendDevice === 0) {
+        data.backendDevice = 'cpu'
+      } else if (data.backendDevice === 1) {
+        data.backendDevice = 'gpu'
+      }
+    }
+    return { type: 'JobEnded', data, error: null }
   }
 
   return null

@@ -7,18 +7,20 @@
 #include <utility>
 #include <vector>
 
-#include <picojson/picojson.h>
 #include <inference-addon-cpp/JsInterface.hpp>
 #include <inference-addon-cpp/JsUtils.hpp>
 #include <inference-addon-cpp/ModelInterfaces.hpp>
 #include <inference-addon-cpp/addon/AddonJs.hpp>
 #include <inference-addon-cpp/handlers/JsOutputHandlerImplementations.hpp>
 #include <inference-addon-cpp/handlers/OutputHandler.hpp>
 #include <inference-addon-cpp/queue/OutputCallbackJs.hpp>
+#include <picojson/picojson.h>
 
 #include "handlers/SdCtxHandlers.hpp"
 #include "model-interface/EsrganUpscalerModel.hpp"
 #include "model-interface/SdModel.hpp"
+#include "utils/BackendLoader.hpp"
+#include "utils/BackendSelection.hpp"
 
 namespace qvac_lib_inference_addon_sd {
 
@@ -310,4 +312,34 @@ activateUpscaler(js_env_t* env, js_callback_info_t* info) try {
 }
 JSCATCH
 
+/**
+ * Query expected ESRGAN RuntimeStats.backendDevice for a config.device value,
+ * using the same Adreno/OpenCL policy as native load. Args: [device] or
+ * [device, backendsDir].
+ */
+inline js_value_t*
+getExpectedEsrganBackendDevice(js_env_t* env, js_callback_info_t* info) try {
+  using namespace qvac_lib_inference_addon_cpp;
+
+  const std::vector<js_value_t*> argv = js::getArguments(env, info);
+  if (argv.empty()) {
+    throw StatusError(
+        general_error::InvalidArgument,
+        "getExpectedEsrganBackendDevice: device argument is required");
+  }
+
+  const std::string device = js::String{env, argv[0]}.as<std::string>(env);
+  std::string backendsDir;
+  if (argv.size() > 1 && !js::is<js::Undefined>(env, argv[1]) &&
+      !js::is<js::Null>(env, argv[1])) {
+    backendsDir = js::String{env, argv[1]}.as<std::string>(env);
+  }
+
+  loadBackendModulesOnce(backendsDir);
+  const std::string expected =
+      sd_backend_selection::expectedEsrganBackendDeviceForConfig(device);
+  return js::String::create(env, expected);
+}
+JSCATCH
+
 } // namespace qvac_lib_inference_addon_sd
@@ -135,8 +135,8 @@ struct SdCtxConfig {
   bool previewNoisy = false;   // also include noisy xT preview
 
   // -- ESRGAN upscaler -------------------------------------------------------
-  // NOLINTNEXTLINE(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers)
-  int upscalerTileSize = 128;
+  static constexpr int K_DEFAULT_UPSCALER_TILE_SIZE = 128;
+  int upscalerTileSize = K_DEFAULT_UPSCALER_TILE_SIZE;
   bool upscalerDirect = false;
   bool upscalerOffloadParamsToCpu = false;
   int upscalerThreads = -1;

@@ -29,6 +29,8 @@ js_value_t* qvacLibInferenceAddonSdExports(js_env_t* env, js_value_t* exports) {
     qvac_lib_inference_addon_cpp::JsInterface::destroyInstance)
   V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger)
   V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger)
+  V("getExpectedEsrganBackendDevice",
+    qvac_lib_inference_addon_sd::getExpectedEsrganBackendDevice)
 
 #undef V
   return exports;

@@ -131,6 +131,11 @@ std::any EsrganUpscalerModel::process(const std::any& input) {
   lastStats_.emplace_back("width", statsWidth);
   lastStats_.emplace_back("height", statsHeight);
   lastStats_.emplace_back("repeats", static_cast<int64_t>(job.repeats));
+  const int backendDevice = upscaler_.actualBackendDevice();
+  if (backendDevice >= 0) {
+    lastStats_.emplace_back(
+        "backendDevice", static_cast<int64_t>(backendDevice));
+  }
 
   return std::any{};
 }

@@ -49,7 +49,7 @@ class EsrganUpscalerModel
   qvac_lib_inference_addon_sd::SdCtxConfig config_;
   qvac_lib_inference_addon_sd::EsrganUpscaler upscaler_;
   mutable std::atomic<bool> cancelRequested_{false};
-  mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_{};
+  mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_;
 
   struct CumulativeStats {
     int64_t modelLoadMs{0};

@@ -43,8 +43,6 @@ thread_local const SdModel* g_abortModel = nullptr;
 
 std::string preferredBackendToString(enum sd_backend_preference_t pref) {
   switch (pref) {
-  case SD_BACKEND_PREF_AUTO:
-    return "auto";
   case SD_BACKEND_PREF_CPU:
     return "cpu";
   case SD_BACKEND_PREF_GPU:
@@ -227,24 +225,8 @@ void SdModel::load() {
   params.enable_mmap = config_.mmap;
   params.offload_params_to_cpu = config_.offloadToCpu;
 
-  // Resolve the effective backend based on GPU capabilities.
-  // Adreno 800+ uses GPU (OpenCL), Adreno 600/700 is forced to CPU,
-  // everything else uses GPU (Vulkan).
-  auto preferredDevice = config_.device == "cpu"
-                             ? sd_backend_selection::BackendDevice::CPU
-                             : sd_backend_selection::BackendDevice::GPU;
-  auto effectiveDevice =
-      sd_backend_selection::resolveBackendForDevice(preferredDevice);
-  const bool preferOpenClForAdreno =
-      sd_backend_selection::shouldPreferOpenClForAdreno(preferredDevice);
-
-  if (effectiveDevice == sd_backend_selection::BackendDevice::CPU) {
-    params.preferred_gpu_backend = SD_BACKEND_PREF_CPU;
-  } else if (preferOpenClForAdreno) {
-    params.preferred_gpu_backend = SD_BACKEND_PREF_OPENCL;
-  } else {
-    params.preferred_gpu_backend = SD_BACKEND_PREF_GPU;
-  }
+  params.preferred_gpu_backend =
+      sd_backend_selection::preferredGpuBackendForConfigDevice(config_.device);
 
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-do-while)
   QLOG_IF(
@@ -360,12 +342,13 @@ std::any SdModel::process(const std::any& input) {
   // Default is "txt2img" for backwards compatibility: a JSON payload that
   // omits "mode" keeps behaving as an image generation job.
   std::string mode = "txt2img";
-  const auto &obj = jsonRoot.get<picojson::object>();
-  if (auto it = obj.find("mode"); it != obj.end()) {
-    if (!it->second.is<std::string>())
-      throw StatusError(general_error::InvalidArgument,
-                        "mode must be a string");
-    mode = it->second.get<std::string>();
+  const auto& obj = jsonRoot.get<picojson::object>();
+  if (const auto modeEntry = obj.find("mode"); modeEntry != obj.end()) {
+    if (!modeEntry->second.is<std::string>()) {
+      throw StatusError(
+          general_error::InvalidArgument, "mode must be a string");
+    }
+    mode = modeEntry->second.get<std::string>();
   }
 
   const bool isVideo =
@@ -382,12 +365,12 @@ std::any SdModel::process(const std::any& input) {
 // process().
 // ---------------------------------------------------------------------------
 
-std::any SdModel::processImage(const GenerationJob &job,
-                               const picojson::value &v) {
+std::any
+SdModel::processImage(const GenerationJob& job, const picojson::value& parsed) {
   // -- Build SdGenConfig from handlers ---------------------------------------
   qvac_lib_inference_addon_sd::SdGenConfig gen{};
-  qvac_lib_inference_addon_sd::applySdGenHandlers(gen,
-                                                  v.get<picojson::object>());
+  qvac_lib_inference_addon_sd::applySdGenHandlers(
+      gen, parsed.get<picojson::object>());
 
   if (gen.mode != "txt2img" && gen.mode != "img2img") {
     throw StatusError(
@@ -578,7 +561,7 @@ std::any SdModel::processImage(const GenerationJob &job,
       if (!job.initImageBytes.empty()) {
         initPng = job.initImageBytes;
       } else {
-        const auto &jsonObj = v.get<picojson::object>();
+        const auto& jsonObj = parsed.get<picojson::object>();
         auto initBytesIt = jsonObj.find("init_image_bytes");
         if (initBytesIt != jsonObj.end() &&
             initBytesIt->second.is<picojson::array>()) {
@@ -778,8 +761,10 @@ std::any SdModel::processImage(const GenerationJob &job,
   // header (which would force a coordinated update across every other
   // package that pulls it in via vcpkg).
   if (wasCancelled) {
-    throw StatusError(std::string(general_error::GeneralAddonId), "Cancelled",
-                      "Job cancelled");
+    throw StatusError(
+        std::string(general_error::GeneralAddonId),
+        "Cancelled",
+        "Job cancelled");
   }
 
   if (outputCount == 0) {
@@ -838,17 +823,18 @@ std::any SdModel::processImage(const GenerationJob &job,
 // Assumes callbacks + guard are already set up by process().
 // ---------------------------------------------------------------------------
 
-std::any SdModel::processVideo(const GenerationJob &job,
-                               const picojson::value &v) {
+std::any
+SdModel::processVideo(const GenerationJob& job, const picojson::value& parsed) {
   // -- Build SdVidGenConfig from handlers ------------------------------------
   qvac_lib_inference_addon_sd::SdVidGenConfig vid{};
-  qvac_lib_inference_addon_sd::applySdVidGenHandlers(vid,
-                                                     v.get<picojson::object>());
+  qvac_lib_inference_addon_sd::applySdVidGenHandlers(
+      vid, parsed.get<picojson::object>());
 
   if (vid.mode != "txt2vid" && vid.mode != "img2vid" && vid.mode != "flf2vid")
-    throw StatusError(general_error::InvalidArgument,
-                      "processVideo: unsupported mode '" + vid.mode +
-                          "' (expected txt2vid, img2vid, or flf2vid)");
+    throw StatusError(
+        general_error::InvalidArgument,
+        "processVideo: unsupported mode '" + vid.mode +
+            "' (expected txt2vid, img2vid, or flf2vid)");
 
   // -- Mode-vs-inputs invariants --------------------------------------------
   // These checks mirror the JS-layer validation but are duplicated here so
@@ -861,17 +847,20 @@ std::any SdModel::processVideo(const GenerationJob &job,
 
   if (vid.mode == "flf2vid") {
     if (job.initImageBytes.empty())
-      throw StatusError(general_error::InvalidArgument,
-                        "flf2vid: init_image (first frame) is required");
+      throw StatusError(
+          general_error::InvalidArgument,
+          "flf2vid: init_image (first frame) is required");
     if (job.endImageBytes.empty())
-      throw StatusError(general_error::InvalidArgument,
-                        "flf2vid: end_image (last frame) is required");
+      throw StatusError(
+          general_error::InvalidArgument,
+          "flf2vid: end_image (last frame) is required");
   }
 
   if (!job.endImageBytes.empty() && vid.mode != "flf2vid")
-    throw StatusError(general_error::InvalidArgument,
-                      "end_image is only valid for mode='flf2vid', got mode='" +
-                          vid.mode + "'");
+    throw StatusError(
+        general_error::InvalidArgument,
+        "end_image is only valid for mode='flf2vid', got mode='" + vid.mode +
+            "'");
 
   if (vid.mode == "txt2vid" && !job.initImageBytes.empty())
     throw StatusError(
@@ -896,9 +885,10 @@ std::any SdModel::processVideo(const GenerationJob &job,
   if (!job.initImageBytes.empty()) {
     initImg = image_codec::decodeImage(job.initImageBytes);
     if (!initImg.data)
-      throw StatusError(general_error::InvalidArgument,
-                        "processVideo: failed to decode init_image (corrupt or "
-                        "unsupported format; supported: PNG, JPEG)");
+      throw StatusError(
+          general_error::InvalidArgument,
+          "processVideo: failed to decode init_image (corrupt or "
+          "unsupported format; supported: PNG, JPEG)");
     // Take ownership *before* the dimension check so a mismatch can't leak
     // the freshly-decoded pixel buffer (mirrors the control_frames path).
     initData.reset(initImg.data);
@@ -910,13 +900,13 @@ std::any SdModel::processVideo(const GenerationJob &job,
     // single source of truth for the video's final dimensions.
     if (static_cast<int>(initImg.width) != vid.width ||
         static_cast<int>(initImg.height) != vid.height)
-      throw StatusError(general_error::InvalidArgument,
-                        "processVideo: init_image dimensions " +
-                            std::to_string(initImg.width) + "x" +
-                            std::to_string(initImg.height) +
-                            " do not match video dimensions " +
-                            std::to_string(vid.width) + "x" +
-                            std::to_string(vid.height));
+      throw StatusError(
+          general_error::InvalidArgument,
+          "processVideo: init_image dimensions " +
+              std::to_string(initImg.width) + "x" +
+              std::to_string(initImg.height) +
+              " do not match video dimensions " + std::to_string(vid.width) +
+              "x" + std::to_string(vid.height));
   }
 
   if (!job.endImageBytes.empty()) {
@@ -953,13 +943,13 @@ std::any SdModel::processVideo(const GenerationJob &job,
       PixelBuffer owned(decoded.data);
       if (static_cast<int>(decoded.width) != vid.width ||
           static_cast<int>(decoded.height) != vid.height) {
-        throw StatusError(general_error::InvalidArgument,
-                          "processVideo: control_frames[" + std::to_string(i) +
-                              "] dimensions " + std::to_string(decoded.width) +
-                              "x" + std::to_string(decoded.height) +
-                              " do not match video dimensions " +
-                              std::to_string(vid.width) + "x" +
-                              std::to_string(vid.height));
+        throw StatusError(
+            general_error::InvalidArgument,
+            "processVideo: control_frames[" + std::to_string(i) +
+                "] dimensions " + std::to_string(decoded.width) + "x" +
+                std::to_string(decoded.height) +
+                " do not match video dimensions " + std::to_string(vid.width) +
+                "x" + std::to_string(vid.height));
       }
       controlData.push_back(std::move(owned));
       controlFrames.push_back(decoded);
@@ -1039,16 +1029,19 @@ std::any SdModel::processVideo(const GenerationJob &job,
   // would be misleading. Typed Cancelled status (see image path above for
   // the 3-arg ctor rationale).
   auto throwCancelled = []() {
-    throw StatusError(std::string(general_error::GeneralAddonId), "Cancelled",
-                      "Job cancelled");
+    throw StatusError(
+        std::string(general_error::GeneralAddonId),
+        "Cancelled",
+        "Job cancelled");
   };
   if (cancelRequested_.load()) {
     throwCancelled();
   }
 
   if (frames.empty())
-    throw StatusError(general_error::InternalError,
-                      "processVideo: generate_video() returned no frames");
+    throw StatusError(
+        general_error::InternalError,
+        "processVideo: generate_video() returned no frames");
 
   // -- Fan out per-frame PNGs (opt-in) --------------------------------------
   // PNG encoding for an 81-frame 832x480 video can take multiple seconds; we