diff --git a/packages/diffusion-cpp/CHANGELOG.md b/packages/diffusion-cpp/CHANGELOG.md index 73564fd848..4b657a8a29 100644 --- a/packages/diffusion-cpp/CHANGELOG.md +++ b/packages/diffusion-cpp/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## [0.10.0] - 2026-05-25 + +### Changed + +- Consume `stable-diffusion-cpp@2026-03-01#5` from `tetherto/qvac-registry-vcpkg` after sd.cpp PR #6 and registry PR #166, which add generic GGML ESRGAN upscaler backend init with device and preference APIs. +- Remove the temporary package-local `stable-diffusion-cpp` vcpkg overlay port; the addon now resolves the port from the registry. + +### Added + +- Report `backendDevice` in ESRGAN upscaler runtime stats after load, reflecting the backend stable-diffusion.cpp selected for the upscaler context. + +### Fixed + +- On Android, ESRGAN `gpu` / `auto` config now falls back to CPU to avoid the unstable mobile GPU/OpenCL upscaler path. + ## [0.9.1] - 2026-05-25 ### Fixed diff --git a/packages/diffusion-cpp/addon.js b/packages/diffusion-cpp/addon.js index 2f0b09b344..32533d8d59 100644 --- a/packages/diffusion-cpp/addon.js +++ b/packages/diffusion-cpp/addon.js @@ -28,7 +28,15 @@ function mapAddonEvent (rawEvent, rawData, rawError) { // `ArrayBuffer.isView` is true for every TypedArray + DataView, false for // plain objects -- exactly the discrimination we want. if (rawData && typeof rawData === 'object' && !ArrayBuffer.isView(rawData)) { - return { type: 'JobEnded', data: rawData, error: null } + const data = { ...rawData } + if (typeof data.backendDevice === 'number') { + if (data.backendDevice === 0) { + data.backendDevice = 'cpu' + } else if (data.backendDevice === 1) { + data.backendDevice = 'gpu' + } + } + return { type: 'JobEnded', data, error: null } } return null diff --git a/packages/diffusion-cpp/addon/src/addon/AddonJs.hpp b/packages/diffusion-cpp/addon/src/addon/AddonJs.hpp index f3f0473624..a5cac96a92 100644 --- a/packages/diffusion-cpp/addon/src/addon/AddonJs.hpp +++ b/packages/diffusion-cpp/addon/src/addon/AddonJs.hpp @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -15,10 +14,13 @@ #include #include #include +#include #include "handlers/SdCtxHandlers.hpp" #include "model-interface/EsrganUpscalerModel.hpp" #include "model-interface/SdModel.hpp" +#include "utils/BackendLoader.hpp" +#include "utils/BackendSelection.hpp" namespace qvac_lib_inference_addon_sd { @@ -310,4 +312,34 @@ activateUpscaler(js_env_t* env, js_callback_info_t* info) try { } JSCATCH +/** + * Query expected ESRGAN RuntimeStats.backendDevice for a config.device value, + * using the same Adreno/OpenCL policy as native load. Args: [device] or + * [device, backendsDir]. + */ +inline js_value_t* +getExpectedEsrganBackendDevice(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + + const std::vector argv = js::getArguments(env, info); + if (argv.empty()) { + throw StatusError( + general_error::InvalidArgument, + "getExpectedEsrganBackendDevice: device argument is required"); + } + + const std::string device = js::String{env, argv[0]}.as(env); + std::string backendsDir; + if (argv.size() > 1 && !js::is(env, argv[1]) && + !js::is(env, argv[1])) { + backendsDir = js::String{env, argv[1]}.as(env); + } + + loadBackendModulesOnce(backendsDir); + const std::string expected = + sd_backend_selection::expectedEsrganBackendDeviceForConfig(device); + return js::String::create(env, expected); +} +JSCATCH + } // namespace qvac_lib_inference_addon_sd diff --git a/packages/diffusion-cpp/addon/src/handlers/SdCtxHandlers.hpp b/packages/diffusion-cpp/addon/src/handlers/SdCtxHandlers.hpp index f8a62bc5ea..153b6e0697 100644 --- a/packages/diffusion-cpp/addon/src/handlers/SdCtxHandlers.hpp +++ b/packages/diffusion-cpp/addon/src/handlers/SdCtxHandlers.hpp @@ -135,8 +135,8 @@ struct SdCtxConfig { bool previewNoisy = false; // also include noisy xT preview // -- ESRGAN upscaler ------------------------------------------------------- - // NOLINTNEXTLINE(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers) - int upscalerTileSize = 128; + static constexpr int K_DEFAULT_UPSCALER_TILE_SIZE = 128; + int upscalerTileSize = K_DEFAULT_UPSCALER_TILE_SIZE; bool upscalerDirect = false; bool upscalerOffloadParamsToCpu = false; int upscalerThreads = -1; diff --git a/packages/diffusion-cpp/addon/src/js-interface/binding.cpp b/packages/diffusion-cpp/addon/src/js-interface/binding.cpp index 7cf882a523..c0fda78469 100644 --- a/packages/diffusion-cpp/addon/src/js-interface/binding.cpp +++ b/packages/diffusion-cpp/addon/src/js-interface/binding.cpp @@ -29,6 +29,8 @@ js_value_t* qvacLibInferenceAddonSdExports(js_env_t* env, js_value_t* exports) { qvac_lib_inference_addon_cpp::JsInterface::destroyInstance) V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger) V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger) + V("getExpectedEsrganBackendDevice", + qvac_lib_inference_addon_sd::getExpectedEsrganBackendDevice) #undef V return exports; diff --git a/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.cpp b/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.cpp index 6ec4b1ca61..3bf0eba487 100644 --- a/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.cpp +++ b/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.cpp @@ -131,6 +131,11 @@ std::any EsrganUpscalerModel::process(const std::any& input) { lastStats_.emplace_back("width", statsWidth); lastStats_.emplace_back("height", statsHeight); lastStats_.emplace_back("repeats", static_cast(job.repeats)); + const int backendDevice = upscaler_.actualBackendDevice(); + if (backendDevice >= 0) { + lastStats_.emplace_back( + "backendDevice", static_cast(backendDevice)); + } return std::any{}; } diff --git a/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.hpp b/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.hpp index 1152119016..08c1eb0b6c 100644 --- a/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.hpp +++ b/packages/diffusion-cpp/addon/src/model-interface/EsrganUpscalerModel.hpp @@ -49,7 +49,7 @@ class EsrganUpscalerModel qvac_lib_inference_addon_sd::SdCtxConfig config_; qvac_lib_inference_addon_sd::EsrganUpscaler upscaler_; mutable std::atomic cancelRequested_{false}; - mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_{}; + mutable qvac_lib_inference_addon_cpp::RuntimeStats lastStats_; struct CumulativeStats { int64_t modelLoadMs{0}; diff --git a/packages/diffusion-cpp/addon/src/model-interface/SdModel.cpp b/packages/diffusion-cpp/addon/src/model-interface/SdModel.cpp index 3b54e144f0..421429f48f 100644 --- a/packages/diffusion-cpp/addon/src/model-interface/SdModel.cpp +++ b/packages/diffusion-cpp/addon/src/model-interface/SdModel.cpp @@ -43,8 +43,6 @@ thread_local const SdModel* g_abortModel = nullptr; std::string preferredBackendToString(enum sd_backend_preference_t pref) { switch (pref) { - case SD_BACKEND_PREF_AUTO: - return "auto"; case SD_BACKEND_PREF_CPU: return "cpu"; case SD_BACKEND_PREF_GPU: @@ -227,24 +225,8 @@ void SdModel::load() { params.enable_mmap = config_.mmap; params.offload_params_to_cpu = config_.offloadToCpu; - // Resolve the effective backend based on GPU capabilities. - // Adreno 800+ uses GPU (OpenCL), Adreno 600/700 is forced to CPU, - // everything else uses GPU (Vulkan). - auto preferredDevice = config_.device == "cpu" - ? sd_backend_selection::BackendDevice::CPU - : sd_backend_selection::BackendDevice::GPU; - auto effectiveDevice = - sd_backend_selection::resolveBackendForDevice(preferredDevice); - const bool preferOpenClForAdreno = - sd_backend_selection::shouldPreferOpenClForAdreno(preferredDevice); - - if (effectiveDevice == sd_backend_selection::BackendDevice::CPU) { - params.preferred_gpu_backend = SD_BACKEND_PREF_CPU; - } else if (preferOpenClForAdreno) { - params.preferred_gpu_backend = SD_BACKEND_PREF_OPENCL; - } else { - params.preferred_gpu_backend = SD_BACKEND_PREF_GPU; - } + params.preferred_gpu_backend = + sd_backend_selection::preferredGpuBackendForConfigDevice(config_.device); // NOLINTNEXTLINE(cppcoreguidelines-avoid-do-while) QLOG_IF( @@ -360,12 +342,13 @@ std::any SdModel::process(const std::any& input) { // Default is "txt2img" for backwards compatibility: a JSON payload that // omits "mode" keeps behaving as an image generation job. std::string mode = "txt2img"; - const auto &obj = jsonRoot.get(); - if (auto it = obj.find("mode"); it != obj.end()) { - if (!it->second.is()) - throw StatusError(general_error::InvalidArgument, - "mode must be a string"); - mode = it->second.get(); + const auto& obj = jsonRoot.get(); + if (const auto modeEntry = obj.find("mode"); modeEntry != obj.end()) { + if (!modeEntry->second.is()) { + throw StatusError( + general_error::InvalidArgument, "mode must be a string"); + } + mode = modeEntry->second.get(); } const bool isVideo = @@ -382,12 +365,12 @@ std::any SdModel::process(const std::any& input) { // process(). // --------------------------------------------------------------------------- -std::any SdModel::processImage(const GenerationJob &job, - const picojson::value &v) { +std::any +SdModel::processImage(const GenerationJob& job, const picojson::value& parsed) { // -- Build SdGenConfig from handlers --------------------------------------- qvac_lib_inference_addon_sd::SdGenConfig gen{}; - qvac_lib_inference_addon_sd::applySdGenHandlers(gen, - v.get()); + qvac_lib_inference_addon_sd::applySdGenHandlers( + gen, parsed.get()); if (gen.mode != "txt2img" && gen.mode != "img2img") { throw StatusError( @@ -578,7 +561,7 @@ std::any SdModel::processImage(const GenerationJob &job, if (!job.initImageBytes.empty()) { initPng = job.initImageBytes; } else { - const auto &jsonObj = v.get(); + const auto& jsonObj = parsed.get(); auto initBytesIt = jsonObj.find("init_image_bytes"); if (initBytesIt != jsonObj.end() && initBytesIt->second.is()) { @@ -778,8 +761,10 @@ std::any SdModel::processImage(const GenerationJob &job, // header (which would force a coordinated update across every other // package that pulls it in via vcpkg). if (wasCancelled) { - throw StatusError(std::string(general_error::GeneralAddonId), "Cancelled", - "Job cancelled"); + throw StatusError( + std::string(general_error::GeneralAddonId), + "Cancelled", + "Job cancelled"); } if (outputCount == 0) { @@ -838,17 +823,18 @@ std::any SdModel::processImage(const GenerationJob &job, // Assumes callbacks + guard are already set up by process(). // --------------------------------------------------------------------------- -std::any SdModel::processVideo(const GenerationJob &job, - const picojson::value &v) { +std::any +SdModel::processVideo(const GenerationJob& job, const picojson::value& parsed) { // -- Build SdVidGenConfig from handlers ------------------------------------ qvac_lib_inference_addon_sd::SdVidGenConfig vid{}; - qvac_lib_inference_addon_sd::applySdVidGenHandlers(vid, - v.get()); + qvac_lib_inference_addon_sd::applySdVidGenHandlers( + vid, parsed.get()); if (vid.mode != "txt2vid" && vid.mode != "img2vid" && vid.mode != "flf2vid") - throw StatusError(general_error::InvalidArgument, - "processVideo: unsupported mode '" + vid.mode + - "' (expected txt2vid, img2vid, or flf2vid)"); + throw StatusError( + general_error::InvalidArgument, + "processVideo: unsupported mode '" + vid.mode + + "' (expected txt2vid, img2vid, or flf2vid)"); // -- Mode-vs-inputs invariants -------------------------------------------- // These checks mirror the JS-layer validation but are duplicated here so @@ -861,17 +847,20 @@ std::any SdModel::processVideo(const GenerationJob &job, if (vid.mode == "flf2vid") { if (job.initImageBytes.empty()) - throw StatusError(general_error::InvalidArgument, - "flf2vid: init_image (first frame) is required"); + throw StatusError( + general_error::InvalidArgument, + "flf2vid: init_image (first frame) is required"); if (job.endImageBytes.empty()) - throw StatusError(general_error::InvalidArgument, - "flf2vid: end_image (last frame) is required"); + throw StatusError( + general_error::InvalidArgument, + "flf2vid: end_image (last frame) is required"); } if (!job.endImageBytes.empty() && vid.mode != "flf2vid") - throw StatusError(general_error::InvalidArgument, - "end_image is only valid for mode='flf2vid', got mode='" + - vid.mode + "'"); + throw StatusError( + general_error::InvalidArgument, + "end_image is only valid for mode='flf2vid', got mode='" + vid.mode + + "'"); if (vid.mode == "txt2vid" && !job.initImageBytes.empty()) throw StatusError( @@ -896,9 +885,10 @@ std::any SdModel::processVideo(const GenerationJob &job, if (!job.initImageBytes.empty()) { initImg = image_codec::decodeImage(job.initImageBytes); if (!initImg.data) - throw StatusError(general_error::InvalidArgument, - "processVideo: failed to decode init_image (corrupt or " - "unsupported format; supported: PNG, JPEG)"); + throw StatusError( + general_error::InvalidArgument, + "processVideo: failed to decode init_image (corrupt or " + "unsupported format; supported: PNG, JPEG)"); // Take ownership *before* the dimension check so a mismatch can't leak // the freshly-decoded pixel buffer (mirrors the control_frames path). initData.reset(initImg.data); @@ -910,13 +900,13 @@ std::any SdModel::processVideo(const GenerationJob &job, // single source of truth for the video's final dimensions. if (static_cast(initImg.width) != vid.width || static_cast(initImg.height) != vid.height) - throw StatusError(general_error::InvalidArgument, - "processVideo: init_image dimensions " + - std::to_string(initImg.width) + "x" + - std::to_string(initImg.height) + - " do not match video dimensions " + - std::to_string(vid.width) + "x" + - std::to_string(vid.height)); + throw StatusError( + general_error::InvalidArgument, + "processVideo: init_image dimensions " + + std::to_string(initImg.width) + "x" + + std::to_string(initImg.height) + + " do not match video dimensions " + std::to_string(vid.width) + + "x" + std::to_string(vid.height)); } if (!job.endImageBytes.empty()) { @@ -953,13 +943,13 @@ std::any SdModel::processVideo(const GenerationJob &job, PixelBuffer owned(decoded.data); if (static_cast(decoded.width) != vid.width || static_cast(decoded.height) != vid.height) { - throw StatusError(general_error::InvalidArgument, - "processVideo: control_frames[" + std::to_string(i) + - "] dimensions " + std::to_string(decoded.width) + - "x" + std::to_string(decoded.height) + - " do not match video dimensions " + - std::to_string(vid.width) + "x" + - std::to_string(vid.height)); + throw StatusError( + general_error::InvalidArgument, + "processVideo: control_frames[" + std::to_string(i) + + "] dimensions " + std::to_string(decoded.width) + "x" + + std::to_string(decoded.height) + + " do not match video dimensions " + std::to_string(vid.width) + + "x" + std::to_string(vid.height)); } controlData.push_back(std::move(owned)); controlFrames.push_back(decoded); @@ -1039,16 +1029,19 @@ std::any SdModel::processVideo(const GenerationJob &job, // would be misleading. Typed Cancelled status (see image path above for // the 3-arg ctor rationale). auto throwCancelled = []() { - throw StatusError(std::string(general_error::GeneralAddonId), "Cancelled", - "Job cancelled"); + throw StatusError( + std::string(general_error::GeneralAddonId), + "Cancelled", + "Job cancelled"); }; if (cancelRequested_.load()) { throwCancelled(); } if (frames.empty()) - throw StatusError(general_error::InternalError, - "processVideo: generate_video() returned no frames"); + throw StatusError( + general_error::InternalError, + "processVideo: generate_video() returned no frames"); // -- Fan out per-frame PNGs (opt-in) -------------------------------------- // PNG encoding for an 81-frame 832x480 video can take multiple seconds; we diff --git a/packages/diffusion-cpp/addon/src/utils/BackendSelection.cpp b/packages/diffusion-cpp/addon/src/utils/BackendSelection.cpp index dded244370..8814aea0d7 100644 --- a/packages/diffusion-cpp/addon/src/utils/BackendSelection.cpp +++ b/packages/diffusion-cpp/addon/src/utils/BackendSelection.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -13,49 +14,87 @@ using namespace qvac_errors; namespace { +constexpr std::string_view K_ADRENO_TOKEN = "adreno"; +constexpr int K_ADRENO_OPEN_CL_MIN_MODEL = 800; +constexpr int K_ADRENO_CPU_FALLBACK_MIN_MODEL = 600; + +unsigned char toLowerAscii(unsigned char character) { + return static_cast(std::tolower(character)); +} + // Extract the Adreno model number from a device description string. // Returns 0 if the device is not an Adreno GPU. // Example: "Adreno (TM) 830" -> 830, "Adreno (TM) 740" -> 740 int parseAdrenoModel(const std::string& description) { std::string lower = description; - std::transform( - lower.begin(), lower.end(), lower.begin(), [](unsigned char c) { - return std::tolower(c); - }); + std::transform(lower.begin(), lower.end(), lower.begin(), toLowerAscii); - auto pos = lower.find("adreno"); + const auto pos = lower.find(K_ADRENO_TOKEN); if (pos == std::string::npos) { return 0; } // Scan forward from "adreno" to find the first digit sequence - for (size_t i = pos + 6; i < lower.size(); ++i) { - if (std::isdigit(static_cast(lower[i]))) { - return std::stoi(lower.substr(i)); + for (size_t idx = pos + K_ADRENO_TOKEN.size(); idx < lower.size(); ++idx) { + if (std::isdigit(static_cast(lower[idx])) != 0) { + return std::stoi(lower.substr(idx)); } } return 0; } -std::string toLowerCopy(std::string s) { - std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { - return std::tolower(c); - }); - return s; +std::string toLowerCopy(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), toLowerAscii); + return str; +} + +int parseAdrenoModelFromGpuDevice(ggml_backend_dev_t dev) { + if (dev == nullptr) { + return 0; + } + const char* descPtr = ggml_backend_dev_description(dev); + const std::string desc = descPtr != nullptr ? descPtr : ""; + int model = parseAdrenoModel(desc); + if (model > 0) { + return model; + } + const char* namePtr = ggml_backend_dev_name(dev); + const std::string name = namePtr != nullptr ? namePtr : ""; + return parseAdrenoModel(name); } } // namespace namespace sd_backend_selection { +namespace { + +[[noreturn]] void throwInvalidConfigDevice(const std::string& device) { + throw StatusError( + general_error::InvalidArgument, + "device must be 'cpu' or 'gpu', got: '" + device + "'"); +} + +} // namespace + +ConfigDevice parseConfigDeviceString(const std::string& device) { + if (device == "cpu") { + return ConfigDevice::Cpu; + } + if (device == "gpu") { + return ConfigDevice::Gpu; + } + throwInvalidConfigDevice(device); +} + BackendDevice preferredDeviceFromMap( const std::unordered_map& configMap) { - auto it = configMap.find("device"); - if (it == configMap.end()) { + const auto deviceEntry = configMap.find("device"); + if (deviceEntry == configMap.end()) { return BackendDevice::GPU; // default: prefer GPU } - const std::string& device = it->second; + const std::string& device = deviceEntry->second; if (device == "gpu") { return BackendDevice::GPU; } @@ -70,12 +109,12 @@ BackendDevice preferredDeviceFromMap( int threadsFromMap( const std::unordered_map& configMap) { - auto it = configMap.find("threads"); - if (it == configMap.end()) { + const auto threadsEntry = configMap.find("threads"); + if (threadsEntry == configMap.end()) { return -1; // auto } try { - return std::stoi(it->second); + return std::stoi(threadsEntry->second); } catch (...) { return -1; } @@ -106,21 +145,22 @@ BackendDevice resolveBackendForDevice(BackendDevice preferred) { const char* name = ggml_backend_dev_name(dev); QLOG_IF( Priority::INFO, - std::string("Backend selection: GPU device '") + desc + - "' (backend: " + name + ")"); + std::string("Backend selection: GPU device '") + + (desc != nullptr ? desc : "") + + "' (backend: " + (name != nullptr ? name : "") + ")"); - int model = parseAdrenoModel(desc); + const int model = parseAdrenoModelFromGpuDevice(dev); if (model > 0) { QLOG_IF( Priority::INFO, "Backend selection: Adreno model " + std::to_string(model)); } - if (model >= 800) { + if (model >= K_ADRENO_OPEN_CL_MIN_MODEL) { QLOG_IF(Priority::INFO, "Backend selection: Adreno 800+ -> GPU (OpenCL)"); return BackendDevice::GPU; } - if (model >= 600) { + if (model >= K_ADRENO_CPU_FALLBACK_MIN_MODEL) { QLOG_IF(Priority::INFO, "Backend selection: Adreno 600/700 -> CPU"); return BackendDevice::CPU; } @@ -149,14 +189,13 @@ bool shouldPreferOpenClForAdreno(BackendDevice preferred) { continue; } - const std::string desc = ggml_backend_dev_description(dev) - ? ggml_backend_dev_description(dev) - : ""; - const std::string backendName = - ggml_backend_dev_name(dev) ? ggml_backend_dev_name(dev) : ""; + const char* descPtr = ggml_backend_dev_description(dev); + const std::string desc = descPtr != nullptr ? descPtr : ""; + const char* namePtr = ggml_backend_dev_name(dev); + const std::string backendName = namePtr != nullptr ? namePtr : ""; - const int model = parseAdrenoModel(desc); - if (model >= 800) { + const int model = parseAdrenoModelFromGpuDevice(dev); + if (model >= K_ADRENO_OPEN_CL_MIN_MODEL) { hasAdreno800Plus = true; } @@ -175,4 +214,62 @@ bool shouldPreferOpenClForAdreno(BackendDevice preferred) { return preferOpenCl; } +sd_backend_preference_t preferredGpuBackendForGpuLikeDevice() { + const BackendDevice preferred = BackendDevice::GPU; + const BackendDevice effective = resolveBackendForDevice(preferred); + if (effective == BackendDevice::CPU) { + return SD_BACKEND_PREF_CPU; + } + if (shouldPreferOpenClForAdreno(preferred)) { + return SD_BACKEND_PREF_OPENCL; + } + return SD_BACKEND_PREF_GPU; +} + +sd_backend_preference_t +preferredGpuBackendForConfigDevice(const std::string& device) { + switch (parseConfigDeviceString(device)) { + case ConfigDevice::Cpu: + return SD_BACKEND_PREF_CPU; + case ConfigDevice::Gpu: + return preferredGpuBackendForGpuLikeDevice(); + } +} + +sd_backend_preference_t +preferredEsrganBackendForConfigDevice(const std::string& device) { +#if defined(__ANDROID__) + switch (parseConfigDeviceString(device)) { + case ConfigDevice::Cpu: + return SD_BACKEND_PREF_CPU; + case ConfigDevice::Gpu: { + using Priority = qvac_lib_inference_addon_cpp::logger::Priority; + QLOG_IF( + Priority::INFO, + "Backend selection: Android ESRGAN gpu -> CPU (unstable GPU/OpenCL " + "path)"); + return SD_BACKEND_PREF_CPU; + } + } +#else + return preferredGpuBackendForConfigDevice(device); +#endif +} + +std::string expectedEsrganBackendDeviceForConfig(const std::string& device) { + switch (parseConfigDeviceString(device)) { + case ConfigDevice::Cpu: + return "cpu"; + case ConfigDevice::Gpu: +#if defined(__ANDROID__) + return "cpu"; +#else + { + const BackendDevice effective = resolveBackendForDevice(BackendDevice::GPU); + return effective == BackendDevice::CPU ? "cpu" : "gpu"; + } +#endif + } +} + } // namespace sd_backend_selection diff --git a/packages/diffusion-cpp/addon/src/utils/BackendSelection.hpp b/packages/diffusion-cpp/addon/src/utils/BackendSelection.hpp index 22d64813a9..d91db5f456 100644 --- a/packages/diffusion-cpp/addon/src/utils/BackendSelection.hpp +++ b/packages/diffusion-cpp/addon/src/utils/BackendSelection.hpp @@ -4,10 +4,21 @@ #include #include +#include + namespace sd_backend_selection { enum class BackendDevice : uint8_t { CPU, GPU }; +/** Validated config.device values shared by SD and ESRGAN upscaler paths. */ +enum class ConfigDevice : uint8_t { Cpu, Gpu }; + +/** + * Parse config.device. Accepts `cpu` or `gpu`. Throws StatusError on any other + * value. + */ +ConfigDevice parseConfigDeviceString(const std::string& device); + /** * Parse the "device" key from a config map. * Returns CPU or GPU. Throws StatusError on unknown value. @@ -43,4 +54,28 @@ BackendDevice resolveBackendForDevice(BackendDevice preferred); */ bool shouldPreferOpenClForAdreno(BackendDevice preferred); +/** + * Map config.device to stable-diffusion.cpp preferred_gpu_backend. + * Omitted device config is handled by SdCtxConfig::device defaulting to `gpu`. + */ +sd_backend_preference_t +preferredGpuBackendForConfigDevice(const std::string& device); + +/** + * ESRGAN-only stable-diffusion.cpp backend preference. + * On Android, config gpu always uses CPU (native ESRGAN GPU/OpenCL is + * unstable). Stable Diffusion uses preferredGpuBackendForConfigDevice() + * instead. + */ +sd_backend_preference_t +preferredEsrganBackendForConfigDevice(const std::string& device); + +/** + * Expected EsrganRuntimeStats.backendDevice ("cpu" or "gpu") after ESRGAN load + * when config.device is @p device. On Android, gpu always expects "cpu". + * Elsewhere mirrors resolveBackendForDevice(): Adreno 600/700 + gpu -> "cpu"; + * Adreno 800+ and other GPUs -> "gpu". + */ +std::string expectedEsrganBackendDeviceForConfig(const std::string& device); + } // namespace sd_backend_selection diff --git a/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.cpp b/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.cpp index 9742c557e3..1c52ce5ff8 100644 --- a/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.cpp +++ b/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.cpp @@ -8,6 +8,7 @@ #include #include +#include "BackendSelection.hpp" #include "LoggingMacros.hpp" using namespace qvac_errors; @@ -16,6 +17,17 @@ namespace qvac_lib_inference_addon_sd { namespace { +sd_upscaler_device_t deviceStringToSd(const std::string& deviceStr) { + using sd_backend_selection::ConfigDevice; + using sd_backend_selection::parseConfigDeviceString; + switch (parseConfigDeviceString(deviceStr)) { + case ConfigDevice::Cpu: + return SD_UPSCALER_DEVICE_CPU; + case ConfigDevice::Gpu: + return SD_UPSCALER_DEVICE_GPU; + } +} + void freeSdImageData(sd_image_t& image) noexcept { if (image.data == nullptr) { return; @@ -32,6 +44,7 @@ void freeSdImageData(sd_image_t& image) noexcept { EsrganUpscalerConfig makeUpscalerConfig(const SdCtxConfig& config) { return EsrganUpscalerConfig{ .esrganPath = config.esrganPath, + .device = config.device, .nThreads = config.nThreads, .upscalerThreads = config.upscalerThreads, .upscalerTileSize = config.upscalerTileSize, @@ -71,6 +84,14 @@ void EsrganUpscaler::load() { ensureContextLocked(); } +int EsrganUpscaler::actualBackendDevice() const { + std::lock_guard lock(mutex_); + if (ctx_ == nullptr) { + return -1; + } + return get_upscaler_backend_device(ctx_.get()); +} + int EsrganUpscaler::resolveThreads() const { if (config_.upscalerThreads == 0 || config_.upscalerThreads < -1) { throw StatusError( @@ -104,12 +125,18 @@ upscaler_ctx_t* EsrganUpscaler::ensureContextLocked() { } const int tileSize = std::max(1, config_.upscalerTileSize); - upscaler_ctx_t* raw = new_upscaler_ctx( + const sd_upscaler_device_t sdDev = deviceStringToSd(config_.device); + const sd_backend_preference_t backendPref = + sd_backend_selection::preferredEsrganBackendForConfigDevice( + config_.device); + upscaler_ctx_t* raw = new_upscaler_ctx_with_device( config_.esrganPath.c_str(), config_.upscalerOffloadParamsToCpu, config_.upscalerDirect, resolveThreads(), - tileSize); + tileSize, + sdDev, + backendPref); if (raw == nullptr) { throw StatusError( diff --git a/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.hpp b/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.hpp index 4e89dc929a..c5ae76090a 100644 --- a/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.hpp +++ b/packages/diffusion-cpp/addon/src/utils/EsrganUpscaler.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -16,6 +15,8 @@ inline constexpr int DEFAULT_UPSCALER_TILE_SIZE = 128; struct EsrganUpscalerConfig { std::string esrganPath; + /** "cpu" or "gpu" — post-init truth is exposed via actualBackendDevice(). */ + std::string device{"gpu"}; int nThreads{-1}; int upscalerThreads{-1}; int upscalerTileSize{DEFAULT_UPSCALER_TILE_SIZE}; @@ -40,6 +41,9 @@ class EsrganUpscaler { void load(); [[nodiscard]] bool isLoaded() const noexcept; + /** 0 = CPU, 1 = GPU, -1 if not loaded. Reflects actual ggml backend after + * init. */ + [[nodiscard]] int actualBackendDevice() const; sd_image_t upscaleImage( const sd_image_t& inputImage, int repeats, const std::function& shouldCancel = {}); diff --git a/packages/diffusion-cpp/index.d.ts b/packages/diffusion-cpp/index.d.ts index b1db245e68..bc7dbe5d63 100644 --- a/packages/diffusion-cpp/index.d.ts +++ b/packages/diffusion-cpp/index.d.ts @@ -73,7 +73,7 @@ export type CacheMode = 'disabled' | 'easycache' | 'ucache' | 'dbcache' | 'taylo export interface SdConfig { /** Number of CPU threads (-1 = auto) */ threads?: NumericLike - /** Preferred compute device: 'gpu' (Metal/Vulkan) or 'cpu' */ + /** Preferred compute device: 'gpu' (default; try GPU backends) or 'cpu' */ device?: 'gpu' | 'cpu' /** Weight quantization type */ type?: WeightType @@ -170,6 +170,12 @@ export interface EsrganUpscalerConfig { upscaler_offload_params_to_cpu?: boolean /** Number of CPU threads for ESRGAN upscaler (-1 = auto) */ upscaler_threads?: NumericLike + /** + * Compute device for the standalone upscaler: `gpu` by default (try GPU, + * fall back to CPU if unavailable) or `cpu`. + * `EsrganRuntimeStats.backendDevice` reports the device actually used. + */ + device?: 'cpu' | 'gpu' /** Logging verbosity: 0=error, 1=warn, 2=info, 3=debug */ verbosity?: NumericLike [key: string]: string | number | boolean | undefined @@ -360,6 +366,11 @@ export interface EsrganRuntimeStats { height: number /** Number of ESRGAN passes used by the most recent upscale job */ repeats: number + /** + * Actual compute device used by the ESRGAN upscaler after init / fallback + * (native 0/1 mapped to 'cpu' / 'gpu' in JS). + */ + backendDevice?: 'cpu' | 'gpu' } export default class ImgStableDiffusion { diff --git a/packages/diffusion-cpp/index.js b/packages/diffusion-cpp/index.js index 2a683b3b18..74b9a914ea 100644 --- a/packages/diffusion-cpp/index.js +++ b/packages/diffusion-cpp/index.js @@ -516,7 +516,8 @@ class EsrganUpscaler { * @param {object} args * @param {object} args.files - Absolute file paths for ESRGAN components * @param {string} args.files.esrgan - ESRGAN upscaler model (absolute path) - * @param {object} [args.config] - ESRGAN context configuration + * @param {object} [args.config] - ESRGAN context configuration (`device`: + * `'gpu'` by default, or `'cpu'`; `'gpu'` tries GPU then falls back to CPU). * @param {object} [args.logger] - Structured logger for JS wrapper logs. * Native C++ logs are process-global; configure them once with * `require('@qvac/diffusion-cpp/addonLogging').setLogger(...)`. diff --git a/packages/diffusion-cpp/package.json b/packages/diffusion-cpp/package.json index 98a9b17212..b45d2c073e 100644 --- a/packages/diffusion-cpp/package.json +++ b/packages/diffusion-cpp/package.json @@ -1,6 +1,6 @@ { "name": "@qvac/diffusion-cpp", - "version": "0.9.1", + "version": "0.10.0", "description": "stable-diffusion.cpp addon for qvac image/video generation", "addon": true, "scripts": { diff --git a/packages/diffusion-cpp/test/integration/esrgan-backend-device.test.js b/packages/diffusion-cpp/test/integration/esrgan-backend-device.test.js new file mode 100644 index 0000000000..f0da59916e --- /dev/null +++ b/packages/diffusion-cpp/test/integration/esrgan-backend-device.test.js @@ -0,0 +1,181 @@ +'use strict' + +const fs = require('bare-fs') +const path = require('bare-path') +const os = require('bare-os') +const proc = require('bare-process') +const b4a = require('b4a') +const test = require('brittle') +const binding = require('../../binding') +const { EsrganUpscaler } = require('../../index') +const { ensureModel, setupJsLogger } = require('./utils') + +const noGpu = proc.env && proc.env.NO_GPU === 'true' +const isAndroid = os.platform() === 'android' +const isMobile = os.platform() === 'ios' || os.platform() === 'android' + +// Device Farm / mobile: skip GPU subtest — ESRGAN GPU backend probing can hang or crash. +const skipGpuBackendDeviceSubtest = noGpu || isMobile + +const ESRGAN_MODEL = { + name: 'RealESRGAN_x4plus_anime_6B.pth', + url: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth' +} + +// Valid 16×16 RGB PNG — backendDevice assertions only; keep inputs tiny for +// slow CPU runners (e.g. linux-arm64 integration). +const TINY_PNG_16X16_B64 = + 'iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAABuElEQVR4nA3NQQEAIQgEQCMQwec+iWAEIhCBCEYgghGMYAQD7IMIRribAtNagzT2Bm0cDdboDdE4G7JxNezG03Abq+E1tiYUQReqYAhN4MIQTGEKlnALjvAKSvgErXVIZ+/QztFhnd4RnbMjO1fH7jwdt7M6Xv8HpSi6UhVDaQpXhmIqU7GUW3GUV1HKp/8wIIN9QAfHgA36QAzOgRxcA3vwDNzBGnjjH4xi6EY1DKMZ3BiGaUzDMm7DMV5DGZ/9g0Oc3aHO4TCnO8I5Helcju08jussx/N/CEqgBzUwghbwYARmMAMruAMneAMVfPEPEzLZJ3RyTNikT8TknMjJNbEnz8SdrIk3/yEpiZ7UxEhawpORmMlMrOROnORNVPLlPyzIYl/QxbFgi74Qi3MhF9fCXjwLd7EW3vqHTdnom7oxNm3DN2NjbubG2twbZ/Nu1Obb/3Agh/1AD8eBHfpBHM6DPFwH+/Ac3MM6eOcfLuWiX+rFuLQLv4yLeZkX63JfnMt7UZfv/kNBir2gxVGwoheiOAtZXIVdPIVbrMKrf3iUh/6oD+PRHvwxHuZjPqzH/XAe70M9vocPli9yEL9ki4IAAAAASUVORK5CYII=' + +function tinyPng16x16 () { + return b4a.from(TINY_PNG_16X16_B64, 'base64') +} + +const JOB_TIMEOUT_MS = isAndroid ? 300000 : 120000 +const BACKENDS_DIR = path.join(__dirname, '../../prebuilds') + +function logPhase (phase, configDevice, expected, actual) { + let line = + '[esrgan-backend-device] phase=' + + phase + + ' platform=' + + os.platform() + + ' arch=' + + os.arch() + + ' config.device=' + + configDevice + if (expected != null) { + line += ' expected backendDevice=' + expected + } + if (actual != null) { + line += ' actual=' + actual + } + console.log(line) +} + +function queryExpectedBackendDevice (configDevice) { + if (typeof binding.getExpectedEsrganBackendDevice !== 'function') { + throw new Error( + 'binding.getExpectedEsrganBackendDevice is required for backend policy tests' + ) + } + return binding.getExpectedEsrganBackendDevice(configDevice, BACKENDS_DIR) +} + +async function ensureEsrganModelPath () { + logPhase('before-model-download', 'n/a') + const [esrganName, modelDir] = await ensureModel({ + modelName: ESRGAN_MODEL.name, + downloadUrl: ESRGAN_MODEL.url + }) + const esrganPath = path.join(modelDir, esrganName) + logPhase('after-model-download', 'n/a', null, esrganPath) + return { esrganPath, modelDir } +} + +test( + 'ESRGAN standalone — config.device cpu reports backendDevice cpu in RuntimeStats', + { timeout: JOB_TIMEOUT_MS }, + async t => { + const configDevice = 'cpu' + setupJsLogger(binding) + logPhase('start', configDevice) + + logPhase('before-query-expected', configDevice) + const expected = queryExpectedBackendDevice(configDevice) + logPhase('after-query-expected', configDevice, expected) + t.is(expected, 'cpu', 'native policy always maps config cpu -> cpu') + + const { esrganPath } = await ensureEsrganModelPath() + t.ok(fs.existsSync(esrganPath), 'ESRGAN weights exist') + + const upscaler = new EsrganUpscaler({ + files: { esrgan: esrganPath }, + config: { + device: configDevice, + upscaler_tile_size: 64, + backendsDir: BACKENDS_DIR + }, + opts: { stats: true }, + logger: console + }) + + try { + logPhase('before-load', configDevice, expected) + await upscaler.load() + logPhase('after-load', configDevice, expected) + + logPhase('before-upscale', configDevice, expected) + const response = await upscaler.upscale(tinyPng16x16(), { repeats: 1 }) + await response.onUpdate(() => {}).await() + logPhase('after-upscale', configDevice, expected, response.stats.backendDevice) + + t.is( + response.stats.backendDevice, + expected, + 'native CPU path maps to stats' + ) + } finally { + await upscaler.unload().catch(() => {}) + try { + binding.releaseLogger() + } catch (_) {} + } + } +) + +test( + 'ESRGAN standalone — config.device gpu reports policy-aligned backendDevice in RuntimeStats', + { timeout: JOB_TIMEOUT_MS, skip: skipGpuBackendDeviceSubtest }, + async t => { + const configDevice = 'gpu' + setupJsLogger(binding) + logPhase('start', configDevice) + + logPhase('before-query-expected', configDevice) + const expected = queryExpectedBackendDevice(configDevice) + logPhase('after-query-expected', configDevice, expected) + t.ok( + expected === 'cpu' || expected === 'gpu', + 'native policy returns cpu or gpu for config gpu' + ) + + const { esrganPath } = await ensureEsrganModelPath() + t.ok(fs.existsSync(esrganPath), 'ESRGAN weights exist') + + const upscaler = new EsrganUpscaler({ + files: { esrgan: esrganPath }, + config: { + device: configDevice, + upscaler_tile_size: 64, + backendsDir: BACKENDS_DIR + }, + opts: { stats: true }, + logger: console + }) + + try { + logPhase('before-load', configDevice, expected) + await upscaler.load() + logPhase('after-load', configDevice, expected) + + logPhase('before-upscale', configDevice, expected) + const response = await upscaler.upscale(tinyPng16x16(), { repeats: 1 }) + await response.onUpdate(() => {}).await() + const actual = response.stats.backendDevice + logPhase('after-upscale', configDevice, expected, actual) + + t.ok( + actual === 'cpu' || actual === 'gpu', + 'config.device gpu: backendDevice may be gpu when accelerated init succeeds, ' + + 'or cpu when runtime falls back (e.g. GPU/OpenCL init failure); ' + + 'native policy hint=' + expected + ', actual=' + actual + ) + } finally { + await upscaler.unload().catch(() => {}) + try { + binding.releaseLogger() + } catch (_) {} + } + } +) diff --git a/packages/diffusion-cpp/test/mobile/integration.auto.cjs b/packages/diffusion-cpp/test/mobile/integration.auto.cjs index 7e30958605..1aa8e7fad1 100644 --- a/packages/diffusion-cpp/test/mobile/integration.auto.cjs +++ b/packages/diffusion-cpp/test/mobile/integration.auto.cjs @@ -10,6 +10,10 @@ async function runApiBehaviorTest (options = {}) { // eslint-disable-line no-unu return runIntegrationModule('../integration/api-behavior.test.js', options) } +async function runEsrganBackendDeviceTest (options = {}) { // eslint-disable-line no-unused-vars + return runIntegrationModule('../integration/esrgan-backend-device.test.js', options) +} + async function runGenerateImageEsrganUpscaleTest (options = {}) { // eslint-disable-line no-unused-vars return runIntegrationModule('../integration/generate-image-esrgan-upscale.test.js', options) } @@ -68,6 +72,7 @@ async function runModelLoadingTest (options = {}) { // eslint-disable-line no-un module.exports = { runApiBehaviorTest, + runEsrganBackendDeviceTest, runGenerateImageEsrganUpscaleTest, runGenerateImageFlux2FusionSurjectiveTest, runGenerateImageFlux2FusionTest, diff --git a/packages/diffusion-cpp/test/unit/map-addon-event.test.js b/packages/diffusion-cpp/test/unit/map-addon-event.test.js index bfe6cdc3c4..5b852c0f76 100644 --- a/packages/diffusion-cpp/test/unit/map-addon-event.test.js +++ b/packages/diffusion-cpp/test/unit/map-addon-event.test.js @@ -29,10 +29,19 @@ test('plain object data maps to JobEnded (RuntimeStats)', function (t) { const stats = { total_time_ms: 5000, steps: 20 } const result = mapAddonEvent('Stats', stats, null) t.is(result.type, 'JobEnded') - t.is(result.data, stats) + t.alike(result.data, stats) t.is(result.error, null) }) +test('JobEnded maps backendDevice 0/1 to cpu/gpu strings', function (t) { + const cpu = mapAddonEvent('Stats', { upscaleMs: 1, backendDevice: 0 }, null) + t.is(cpu.data.backendDevice, 'cpu') + const gpu = mapAddonEvent('Stats', { upscaleMs: 1, backendDevice: 1 }, null) + t.is(gpu.data.backendDevice, 'gpu') + const passthrough = mapAddonEvent('Stats', { backendDevice: 2 }, null) + t.is(passthrough.data.backendDevice, 2) +}) + test('Error event takes precedence over data shape', function (t) { const err = new Error('boom') const bytes = new Uint8Array([1, 2, 3]) diff --git a/packages/diffusion-cpp/test/unit/test_backend_selection.cpp b/packages/diffusion-cpp/test/unit/test_backend_selection.cpp index 1c5d6b6a69..65364b3750 100644 --- a/packages/diffusion-cpp/test/unit/test_backend_selection.cpp +++ b/packages/diffusion-cpp/test/unit/test_backend_selection.cpp @@ -50,3 +50,63 @@ TEST_F(SdBackendSelectionTest, ResolveBackendCpuPreferenceReturnsCPU) { TEST_F(SdBackendSelectionTest, CpuPreferenceDoesNotPreferOpenCl) { EXPECT_FALSE(shouldPreferOpenClForAdreno(BackendDevice::CPU)); } + +TEST_F(SdBackendSelectionTest, PreferredGpuBackendCpuDevice) { + EXPECT_EQ(preferredGpuBackendForConfigDevice("cpu"), SD_BACKEND_PREF_CPU); +} + +TEST_F(SdBackendSelectionTest, PreferredGpuBackendGpuDeviceIsGpuOrCpu) { + const auto pref = preferredGpuBackendForConfigDevice("gpu"); + EXPECT_TRUE( + pref == SD_BACKEND_PREF_GPU || pref == SD_BACKEND_PREF_OPENCL || + pref == SD_BACKEND_PREF_CPU); +} + +TEST_F(SdBackendSelectionTest, ExpectedEsrganBackendCpuConfig) { + EXPECT_EQ(expectedEsrganBackendDeviceForConfig("cpu"), "cpu"); +} + +TEST_F(SdBackendSelectionTest, ExpectedEsrganBackendGpuConfigIsCpuOrGpu) { + const std::string expected = expectedEsrganBackendDeviceForConfig("gpu"); + EXPECT_TRUE(expected == "cpu" || expected == "gpu"); + const auto pref = preferredEsrganBackendForConfigDevice("gpu"); + if (pref == SD_BACKEND_PREF_CPU) { + EXPECT_EQ(expected, "cpu"); + } else { + EXPECT_EQ(expected, "gpu"); + } +} + +#if defined(__ANDROID__) +TEST_F(SdBackendSelectionTest, AndroidEsrganGpuConfigForcesCpu) { + EXPECT_EQ(expectedEsrganBackendDeviceForConfig("gpu"), "cpu"); + EXPECT_EQ(preferredEsrganBackendForConfigDevice("gpu"), SD_BACKEND_PREF_CPU); + EXPECT_EQ(preferredEsrganBackendForConfigDevice("cpu"), SD_BACKEND_PREF_CPU); +} +#endif + +TEST_F(SdBackendSelectionTest, AutoDeviceThrows) { + EXPECT_THROW( + preferredGpuBackendForConfigDevice("auto"), qvac_errors::StatusError); + EXPECT_THROW( + preferredEsrganBackendForConfigDevice("auto"), qvac_errors::StatusError); + EXPECT_THROW( + expectedEsrganBackendDeviceForConfig("auto"), qvac_errors::StatusError); +} + +TEST_F(SdBackendSelectionTest, EmptyDeviceThrows) { + EXPECT_THROW( + preferredGpuBackendForConfigDevice(""), qvac_errors::StatusError); +} + +TEST_F(SdBackendSelectionTest, PreferredGpuBackendInvalidDeviceThrows) { + EXPECT_THROW( + preferredGpuBackendForConfigDevice("bogus"), qvac_errors::StatusError); + EXPECT_THROW( + preferredGpuBackendForConfigDevice("cuda"), qvac_errors::StatusError); +} + +TEST_F(SdBackendSelectionTest, ExpectedEsrganBackendInvalidDeviceThrows) { + EXPECT_THROW( + expectedEsrganBackendDeviceForConfig("bogus"), qvac_errors::StatusError); +} diff --git a/packages/diffusion-cpp/vcpkg.json b/packages/diffusion-cpp/vcpkg.json index 49a181b9a0..39e93f66fb 100644 --- a/packages/diffusion-cpp/vcpkg.json +++ b/packages/diffusion-cpp/vcpkg.json @@ -15,7 +15,7 @@ }, { "name": "stable-diffusion-cpp", - "version>=": "2026-03-01#4" + "version>=": "2026-03-01#5" }, "stb" ],