Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion packages/lib-infer-diffusion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,25 @@ bare_module_target("." unused_target NAME module_name VERSION unused_version)
set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}")
message("Building qvac-lib-inference-addon-sd with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'")

add_bare_module(qvac-lib-inference-addon-sd)
# On Android with GGML_BACKEND_DL, install GPU backend .so modules alongside
# the addon so ggml can dlopen them at runtime. CPU backends are statically
# linked (GGML_CPU_STATIC) and excluded from this loop.
set(BACKEND_DL_EXPORTS "")
if(ANDROID AND GGML_BACKEND_DL)
foreach(_backend ${GGML_AVAILABLE_BACKENDS})
if("${_backend}" MATCHES "^ggml-cpu")
continue()
endif()
find_library(_${_backend}_LIB "${_backend}" REQUIRED)
add_library(ggml::${_backend} SHARED IMPORTED)
set_target_properties(ggml::${_backend} PROPERTIES
IMPORTED_LOCATION "${_${_backend}_LIB}"
IMPORTED_NO_SONAME TRUE)
list(APPEND BACKEND_DL_EXPORTS INSTALL TARGET ggml::${_backend})
endforeach()
endif()

add_bare_module(qvac-lib-inference-addon-sd EXPORTS ${BACKEND_DL_EXPORTS})
set(ADDON_SOURCES
${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp
${PROJECT_SOURCE_DIR}/addon/src/handlers/SdCtxHandlers.cpp
Expand Down Expand Up @@ -97,6 +115,10 @@ target_compile_features(${qvac-lib-inference-addon-sd} PRIVATE cxx_std_20)
target_compile_definitions(${qvac-lib-inference-addon-sd} PUBLIC JS_LOGGER)
target_compile_definitions(${qvac-lib-inference-addon-sd} PRIVATE BACKENDS_SUBDIR="${BACKENDS_SUBDIR_VALUE}")

if(GGML_BACKEND_DL)
target_compile_definitions(${qvac-lib-inference-addon-sd} PRIVATE GGML_BACKEND_DL)
endif()

if(BUILD_TESTING)
find_package(GTest CONFIG REQUIRED)
include(GoogleTest)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ const SdCtxHandlersMap SD_CTX_HANDLERS = {
c.forceSDXLVaeConvScale = parseBool(v, "force_sdxl_vae_conv_scale");
}},

// ── Backend loading
// ────────────────────────────────────────────────────────────

{"backendsDir",
[](SdCtxConfig& c, const std::string& v) { c.backendsDir = v; }},

// ── Logging
// ────────────────────────────────────────────────────────────────

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ struct SdCtxConfig {
// ── SDXL compatibility ────────────────────────────────────────────────────
bool forceSDXLVaeConvScale = false; // force SDXL VAE conv scale (compat fix)

// ── Backend loading ────────────────────────────────────────────────────────
std::string backendsDir; // directory containing DL backend .so modules

// ── Internal ──────────────────────────────────────────────────────────────
// Upstream defaults to true, which frees model weight buffers after each
// generate_image_internal() call. The addon reuses a single sd_ctx across
Expand Down
41 changes: 36 additions & 5 deletions packages/lib-infer-diffusion/addon/src/model-interface/SdModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <chrono>
#include <cstring>
#include <filesystem>
#include <sstream>

#define STB_IMAGE_IMPLEMENTATION
Expand All @@ -12,6 +13,8 @@
#include <qvac-lib-inference-addon-cpp/Logger.hpp>
#include <stb_image_write.h>

#include <ggml-backend.h>

#include "utils/BackendSelection.hpp"
#include "utils/LoggingMacros.hpp"

Expand Down Expand Up @@ -115,6 +118,32 @@ void SdModel::load() {
params.flash_attn = config_.flashAttn;
params.diffusion_flash_attn = config_.diffusionFlashAttn;

// Load DL GPU backend modules before probing devices / creating the SD
// context. In GGML_BACKEND_DL mode, device enumeration is empty until these
// backend modules are loaded.
#ifdef GGML_BACKEND_DL
{
static bool backendsLoaded = false;
if (!backendsLoaded) {
using Priority = qvac_lib_inference_addon_cpp::logger::Priority;
if (!config_.backendsDir.empty()) {
std::filesystem::path backendsDirPath(config_.backendsDir);
#ifdef BACKENDS_SUBDIR
backendsDirPath = backendsDirPath / BACKENDS_SUBDIR;
backendsDirPath = backendsDirPath.lexically_normal();
#endif
QLOG_IF(Priority::INFO,
"Loading GPU backends from: " + backendsDirPath.string());
ggml_backend_load_all_from_path(backendsDirPath.string().c_str());
} else {
QLOG_IF(Priority::INFO, "Loading GPU backends from default path");
ggml_backend_load_all();
}
backendsLoaded = true;
}
}
#endif

// ── Memory management ─────────────────────────────────────────────────────
params.enable_mmap = config_.mmap;
params.offload_params_to_cpu = config_.offloadToCpu;
Expand All @@ -127,13 +156,15 @@ void SdModel::load() {
: sd_backend_selection::BackendDevice::GPU;
auto effectiveDevice =
sd_backend_selection::resolveBackendForDevice(preferredDevice);
const bool preferOpenClForAdreno =
sd_backend_selection::shouldPreferOpenClForAdreno(preferredDevice);

if (effectiveDevice == sd_backend_selection::BackendDevice::CPU) {
#ifdef _WIN32
_putenv_s("SD_CPU_ONLY", "1");
#else
setenv("SD_CPU_ONLY", "1", 1);
#endif
params.preferred_gpu_backend = SD_BACKEND_PREF_CPU;
} else if (preferOpenClForAdreno) {
params.preferred_gpu_backend = SD_BACKEND_PREF_OPENCL;
} else {
params.preferred_gpu_backend = SD_BACKEND_PREF_GPU;
}

#if defined(__APPLE__)
Expand Down
51 changes: 51 additions & 0 deletions packages/lib-infer-diffusion/addon/src/utils/BackendSelection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ int parseAdrenoModel(const std::string& description) {
return 0;
}

std::string toLowerCopy(std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
return std::tolower(c);
});
return s;
}

} // namespace

namespace sd_backend_selection {
Expand Down Expand Up @@ -118,4 +125,48 @@ BackendDevice resolveBackendForDevice(BackendDevice preferred) {
return BackendDevice::GPU;
}

bool shouldPreferOpenClForAdreno(BackendDevice preferred) {
using Priority = qvac_lib_inference_addon_cpp::logger::Priority;

if (preferred == BackendDevice::CPU) {
return false;
}

const size_t nDevices = ggml_backend_dev_count();
bool hasAdreno800Plus = false;
bool hasOpenClGpu = false;

for (size_t i = 0; i < nDevices; ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
enum ggml_backend_dev_type devType = ggml_backend_dev_type(dev);
if (devType != GGML_BACKEND_DEVICE_TYPE_GPU &&
devType != GGML_BACKEND_DEVICE_TYPE_IGPU) {
continue;
}

const std::string desc =
ggml_backend_dev_description(dev) ? ggml_backend_dev_description(dev) : "";
const std::string backendName =
ggml_backend_dev_name(dev) ? ggml_backend_dev_name(dev) : "";

const int model = parseAdrenoModel(desc);
if (model >= 800) {
hasAdreno800Plus = true;
}

if (toLowerCopy(backendName).find("opencl") != std::string::npos) {
hasOpenClGpu = true;
}
}

const bool preferOpenCl = hasAdreno800Plus && hasOpenClGpu;
if (preferOpenCl) {
QLOG_IF(
Priority::INFO,
"Backend selection: Adreno 800+ with OpenCL backend available -> "
"prefer OpenCL");
}
return preferOpenCl;
}

} // namespace sd_backend_selection
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,12 @@ int threadsFromMap(
*/
BackendDevice resolveBackendForDevice(BackendDevice preferred);

/**
* Returns true when runtime device probing indicates that OpenCL should be
* preferred for Adreno 800+ GPUs.
*
* This only applies when preferred is GPU. CPU preference always returns false.
*/
bool shouldPreferOpenClForAdreno(BackendDevice preferred);

} // namespace sd_backend_selection
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b0b8e578..576d6a04 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -177,6 +177,7 @@ option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})

option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
+option(GGML_CPU_STATIC "ggml: build CPU backend as static library even with GGML_BACKEND_DL" OFF)
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d577f809..ef3a1308 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -187,6 +187,10 @@ endif()

# GGML_BACKEND_DL works with static core when PIC is enabled below.

+if (GGML_CPU_STATIC AND GGML_CPU_ALL_VARIANTS)
+ message(FATAL_ERROR "GGML_CPU_STATIC is incompatible with GGML_CPU_ALL_VARIANTS")
+endif()
+
add_library(ggml-base
../include/ggml.h
../include/ggml-alloc.h
@@ -243,7 +247,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
endif()

function(ggml_add_backend_library backend)
- if (GGML_BACKEND_DL)
+ if (GGML_BACKEND_DL AND NOT (GGML_CPU_STATIC AND ${backend} MATCHES "^ggml-cpu"))
add_library(${backend} MODULE ${ARGN})
# write the shared library to the output directory
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
@@ -299,6 +303,9 @@ function(ggml_add_backend backend)
string(TOUPPER "GGML_USE_${backend}" backend_use)
target_compile_definitions(ggml PUBLIC ${backend_use})
endif()
+ if (GGML_CPU_STATIC AND "${backend}" STREQUAL "CPU")
+ target_compile_definitions(ggml PUBLIC GGML_USE_CPU)
+ endif()
endif()
endfunction()

diff --git a/cmake/ggml-config.cmake.in b/cmake/ggml-config.cmake.in
index 91c9d5cd..0c7a92c8 100644
--- a/cmake/ggml-config.cmake.in
+++ b/cmake/ggml-config.cmake.in
@@ -128,6 +128,12 @@ if(NOT TARGET ggml::ggml)
set(_ggml_all_targets "")
- if (NOT GGML_BACKEND_DL)
+ # In hybrid mode (GGML_BACKEND_DL + GGML_CPU_STATIC), only the CPU backend
+ # is static and must still be exported to downstream consumers.
+ if (NOT GGML_BACKEND_DL OR GGML_CPU_STATIC)
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
+ string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
+ if (GGML_BACKEND_DL AND GGML_CPU_STATIC AND NOT is_cpu_variant)
+ continue()
+ endif()
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)

20 changes: 16 additions & 4 deletions packages/lib-infer-diffusion/vcpkg/ports/ggml/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ vcpkg_from_github(
ggml-opencl-graceful-no-devices.patch
ggml-config-include-dir.patch
ggml-static-core-dl-backends.patch
ggml-cpu-static-hybrid.patch
)

# --- GPU feature flags ---
Expand Down Expand Up @@ -93,12 +94,14 @@ endif()
# --- Platform options ---
set(PLATFORM_OPTIONS)

# stable-diffusion.cpp calls ggml_backend_cpu_init() and ggml_backend_is_cpu()
# directly, so the CPU backend must be statically linked. GGML_BACKEND_DL is
# therefore OFF — all backends (CPU, Vulkan, OpenCL) are static libraries
# linked into the consumer binary.
# Hybrid backend mode for Android: GPU backends (Vulkan, OpenCL) are MODULE
# .so files loaded at runtime via dlopen — no libOpenCL.so NEEDED dependency.
# The CPU backend is statically linked (GGML_CPU_STATIC) so that SD can call
# ggml_set_f32, ggml_backend_cpu_init, etc. directly at link time.
if(VCPKG_TARGET_IS_ANDROID)
list(APPEND PLATFORM_OPTIONS
-DGGML_BACKEND_DL=ON
-DGGML_CPU_STATIC=ON
-DGGML_VULKAN_DISABLE_COOPMAT=ON
-DGGML_VULKAN_DISABLE_COOPMAT2=ON
)
Expand Down Expand Up @@ -126,6 +129,15 @@ vcpkg_cmake_configure(

vcpkg_cmake_install()

# Install DL backend .so files for Android. ggml builds each backend as a
# MODULE target but does NOT install them via cmake install().
if(VCPKG_TARGET_IS_ANDROID)
file(GLOB _backend_sos "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libggml-*.so")
if(_backend_sos)
file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib")
endif()
endif()

# Fix up the CMake package config installed by ggml's own build system.
vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml)

Expand Down
6 changes: 5 additions & 1 deletion packages/lib-infer-diffusion/vcpkg/ports/ggml/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@
"name": "metal",
"platform": "osx | ios"
},
{
"name": "opencl",
"platform": "android"
},
{
"name": "vulkan",
"platform": "windows | linux"
"platform": "windows | linux | android"
}
],
"features": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
# lib/libstable-diffusion.a (static library)
# share/stable-diffusion-cpp/ (CMake package config)
#
# GPU backend selection is controlled via vcpkg features which forward to
# the ggml dependency. The SD_* options below set compile-time defines
# (e.g. -DSD_USE_CUDA) that the stable-diffusion.cpp source requires.
# GPU backend selection is handled at runtime via ggml's backend registry.
# The sd-generic-backend-init patch replaces SD's backend-specific init
# with ggml_backend_init_by_type() which works with both statically linked
# and dynamically loaded backends.

# Pinned to release tag master-514-5792c66 (2026-03-01).
vcpkg_from_github(
Expand All @@ -20,39 +21,13 @@ vcpkg_from_github(
SHA512 9bdf945d27ea24d9ea8218a7b875b6d1346711122723453840f4648cd862de3be28e37736ce0ef46ed304cbe810593dfa4264eec969c9e0c8dafb854298280f7
HEAD_REF master
PATCHES
sd-cpu-only.patch
sd-backend-priority.patch
sd-generic-backend-init.patch
abort-callback.patch
fix-failure-path-cleanup.patch
)

# --- GPU feature flags ---
# These set SD_* cache variables which the upstream CMakeLists.txt translates
# into -DSD_USE_<backend> compile definitions. The actual ggml backend
# libraries are already built and installed by the ggml port.

set(SD_METAL OFF)
set(SD_VULKAN OFF)
set(SD_CUDA OFF)
set(SD_OPENCL OFF)
set(SD_FLASH_ATTN OFF)

if("metal" IN_LIST FEATURES)
set(SD_METAL ON)
endif()

if("vulkan" IN_LIST FEATURES)
set(SD_VULKAN ON)
endif()

if("cuda" IN_LIST FEATURES)
set(SD_CUDA ON)
endif()

if("opencl" IN_LIST FEATURES)
set(SD_OPENCL ON)
endif()

if("flash-attn" IN_LIST FEATURES)
set(SD_FLASH_ATTN ON)
endif()
Expand All @@ -69,10 +44,6 @@ vcpkg_cmake_configure(
-DSD_BUILD_EXAMPLES=OFF
-DSD_BUILD_SHARED_LIBS=OFF
-DSD_USE_SYSTEM_GGML=ON
-DSD_METAL=${SD_METAL}
-DSD_VULKAN=${SD_VULKAN}
-DSD_CUDA=${SD_CUDA}
-DSD_OPENCL=${SD_OPENCL}
-DSD_FLASH_ATTN=${SD_FLASH_ATTN}
MAYBE_UNUSED_VARIABLES
SD_FLASH_ATTN
Expand Down
Loading
Loading