Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions packages/lib-infer-diffusion/addon/src/model-interface/SdModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <cstring>
#include <filesystem>
#include <sstream>
#include <system_error>
#include <vector>

#define STB_IMAGE_IMPLEMENTATION
#include <stb_image.h>
Expand Down Expand Up @@ -38,6 +40,158 @@ thread_local ProgressCtx tl_progressCtx;
// sd_abort_cb_data when multiple SdModel instances could coexist.
thread_local const SdModel* tl_abortModel = nullptr;

std::string backendDeviceTypeToString(enum ggml_backend_dev_type type) {
switch (type) {
case GGML_BACKEND_DEVICE_TYPE_CPU:
return "CPU";
case GGML_BACKEND_DEVICE_TYPE_GPU:
return "GPU";
case GGML_BACKEND_DEVICE_TYPE_IGPU:
return "IGPU";
case GGML_BACKEND_DEVICE_TYPE_ACCEL:
return "ACCEL";
default:
return "UNKNOWN";
}
}

std::string preferredBackendToString(enum sd_backend_preference_t pref) {
switch (pref) {
case SD_BACKEND_PREF_AUTO:
return "auto";
case SD_BACKEND_PREF_CPU:
return "cpu";
case SD_BACKEND_PREF_GPU:
return "gpu";
case SD_BACKEND_PREF_OPENCL:
return "opencl";
default:
return "unknown";
}
}

void logBackendRegistrySnapshot() {
using Priority = qvac_lib_inference_addon_cpp::logger::Priority;

const size_t regCount = ggml_backend_reg_count();
const size_t devCount = ggml_backend_dev_count();
QLOG_IF(
Priority::INFO,
"GGML backend registry snapshot: " + std::to_string(regCount) +
" registry entries, " + std::to_string(devCount) + " devices");

for (size_t i = 0; i < regCount; ++i) {
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
const char* regName = reg ? ggml_backend_reg_name(reg) : nullptr;
const size_t regDevCount = reg ? ggml_backend_reg_dev_count(reg) : 0;
QLOG_IF(
Priority::INFO,
"GGML backend registry[" + std::to_string(i) + "]: name='" +
std::string(regName ? regName : "<null>") +
"', devices=" + std::to_string(regDevCount));
}

for (size_t i = 0; i < devCount; ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
if (!dev) {
QLOG_IF(
Priority::WARNING,
"GGML backend device[" + std::to_string(i) + "]: null device handle");
continue;
}

const char* name = ggml_backend_dev_name(dev);
const char* desc = ggml_backend_dev_description(dev);
const auto type = ggml_backend_dev_type(dev);
size_t memFree = 0;
size_t memTotal = 0;
ggml_backend_dev_memory(dev, &memFree, &memTotal);

ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
const char* regName = reg ? ggml_backend_reg_name(reg) : nullptr;

QLOG_IF(
Priority::INFO,
"GGML backend device[" + std::to_string(i) + "]: name='" +
std::string(name ? name : "<null>") + "', desc='" +
std::string(desc ? desc : "<null>") +
"', type=" + backendDeviceTypeToString(type) + ", reg='" +
std::string(regName ? regName : "<null>") +
"', mem_free=" + std::to_string(memFree) +
", mem_total=" + std::to_string(memTotal));
}
}

void logBackendModulePathSnapshot(
const std::filesystem::path& backendsDirPath) {
using Priority = qvac_lib_inference_addon_cpp::logger::Priority;

std::error_code ec;
const bool exists = std::filesystem::exists(backendsDirPath, ec);
QLOG_IF(
Priority::INFO,
"Backend module path exists=" + std::string(exists ? "true" : "false") +
" path='" + backendsDirPath.string() + "'");
if (ec) {
QLOG_IF(
Priority::WARNING,
"Backend module path existence check error: " + ec.message());
return;
}
if (!exists) {
return;
}

const bool isDir = std::filesystem::is_directory(backendsDirPath, ec);
QLOG_IF(
Priority::INFO,
"Backend module path is_directory=" +
std::string(isDir ? "true" : "false"));
if (ec || !isDir) {
if (ec) {
QLOG_IF(
Priority::WARNING,
"Backend module path type check error: " + ec.message());
}
return;
}

std::vector<std::string> entries;
for (const auto& dirEntry :
std::filesystem::directory_iterator(backendsDirPath, ec)) {
if (ec) {
QLOG_IF(
Priority::WARNING,
"Backend module path iteration error: " + ec.message());
break;
}
const auto filename = dirEntry.path().filename().string();
if (filename.rfind("libqvac-diffusion-ggml-", 0) == 0 &&
dirEntry.path().extension() == ".so") {
entries.push_back(filename);
}
}

if (entries.empty()) {
QLOG_IF(
Priority::WARNING,
"No qvac diffusion GGML backend modules found under: " +
backendsDirPath.string());
return;
}

std::ostringstream oss;
for (size_t i = 0; i < entries.size(); ++i) {
if (i > 0) {
oss << ", ";
}
oss << entries[i];
}
QLOG_IF(
Priority::INFO,
"Detected qvac diffusion GGML backend modules: " + oss.str());
}

void sdProgressCallback(int step, int steps, float /*time*/, void* /*data*/) {
if (!tl_progressCtx.job || !tl_progressCtx.job->progressCallback)
return;
Expand Down Expand Up @@ -165,12 +319,14 @@ void SdModel::load() {
QLOG_IF(
Priority::INFO,
"Loading GPU backends from: " + backendsDirPath.string());
logBackendModulePathSnapshot(backendsDirPath);
ggml_backend_load_all_from_path(backendsDirPath.string().c_str());
} else {
QLOG_IF(Priority::INFO, "Loading GPU backends from default path");
ggml_backend_load_all();
}
backendsLoaded = true;
logBackendRegistrySnapshot();
}
}
#endif
Expand Down Expand Up @@ -198,6 +354,12 @@ void SdModel::load() {
params.preferred_gpu_backend = SD_BACKEND_PREF_GPU;
}

QLOG_IF(
qvac_lib_inference_addon_cpp::logger::Priority::INFO,
"Preferred backend passed to stable-diffusion: " +
preferredBackendToString(params.preferred_gpu_backend) + " (" +
std::to_string(static_cast<int>(params.preferred_gpu_backend)) + ")");

#if defined(__APPLE__)
// The ggml Metal backend does not fully support GGML_OP_NORM for
// non-contiguous tensors (the CLIP text encoder hits this path).
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h
index 51b2b32..8da9adf 100644
--- a/include/stable-diffusion.h
+++ b/include/stable-diffusion.h
@@ -331,10 +331,12 @@ typedef struct sd_ctx_t sd_ctx_t;

typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
+typedef bool (*sd_abort_cb_t)(void* data);
typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy, void* data);

SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
+SD_API void sd_set_abort_callback(sd_abort_cb_t cb, void* data);
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
SD_API int32_t sd_get_num_physical_cores();
SD_API const char* sd_get_system_info();
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
index d769d45..d51f039 100644
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@@ -2193,6 +2193,9 @@ public:
int showstep = std::abs(step);
pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep);
// LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000);
+ if (sd_abort_requested()) {
+ return (ggml_tensor*)nullptr;
+ }
}
return denoised;
};
diff --git a/src/util.cpp b/src/util.cpp
index a94cfd9..343815d 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -270,6 +270,9 @@ int32_t sd_get_num_physical_cores() {
static sd_progress_cb_t sd_progress_cb = nullptr;
void* sd_progress_cb_data = nullptr;

+static sd_abort_cb_t sd_abort_cb = nullptr;
+static void* sd_abort_cb_data = nullptr;
+
static sd_preview_cb_t sd_preview_cb = nullptr;
static void* sd_preview_cb_data = nullptr;
preview_t sd_preview_mode = PREVIEW_NONE;
@@ -423,6 +426,15 @@ void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
sd_progress_cb = cb;
sd_progress_cb_data = data;
}
+
+void sd_set_abort_callback(sd_abort_cb_t cb, void* data) {
+ sd_abort_cb = cb;
+ sd_abort_cb_data = data;
+}
+
+bool sd_abort_requested() {
+ return sd_abort_cb && sd_abort_cb(sd_abort_cb_data);
+}
void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode, int interval, bool denoised, bool noisy, void* data) {
sd_preview_cb = cb;
sd_preview_cb_data = data;
diff --git a/src/util.h b/src/util.h
index 7dee7bf..254041e 100644
--- a/src/util.h
+++ b/src/util.h
@@ -69,6 +69,7 @@ protected:
std::string path_join(const std::string& p1, const std::string& p2);
std::vector<std::string> split_string(const std::string& str, char delimiter);
void pretty_progress(int step, int steps, float time);
+bool sd_abort_requested();

void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
index d769d45..db3f242 100644
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@@ -2203,7 +2203,11 @@ public:
control_net->free_control_ctx();
control_net->free_compute_buffer();
}
- diffusion_model->free_compute_buffer();
+ // Upstream bug: abort/failure path freed the wrong model's compute
+ // buffer (diffusion_model instead of work_diffusion_model). The
+ // success path at line ~2218 frees work_diffusion_model -- this must
+ // match, otherwise sd_ctx state is corrupted and reuse segfaults.
+ work_diffusion_model->free_compute_buffer();
return NULL;
}

@@ -3796,6 +3800,13 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g

size_t t2 = ggml_time_ms();

+ // When generate_image_internal() returns NULL (abort or failure),
+ // work_ctx is never freed inside that function -- it only frees on
+ // the success path. Free it here to avoid leaking the ggml context.
+ if (result_images == nullptr) {
+ ggml_free(work_ctx);
+ }
+
LOG_INFO("generate_image completed in %.2fs", (t2 - t0) * 1.0f / 1000);

return result_images;

Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# stable-diffusion.cpp vcpkg overlay port
#
# Builds the stable-diffusion.cpp inference library and links against the
# system-installed ggml (provided by the separate ggml overlay port).
#
# Installed artefacts:
# include/stable-diffusion.h (main C API)
# lib/libstable-diffusion.a (static library)
# share/stable-diffusion-cpp/ (CMake package config)
#
# GPU backend selection is handled at runtime via ggml's backend registry.
# The sd-generic-backend-init patch replaces SD's backend-specific init
# with ggml_backend_init_by_type() which works with both statically linked
# and dynamically loaded backends.
#
# Pinned to release tag master-514-5792c66 (2026-03-01).
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO tetherto/qvac-ext-stable-diffusion.cpp
REF 5792c668798083f9f6d57dac66fbc62ddfdac405
SHA512 9bdf945d27ea24d9ea8218a7b875b6d1346711122723453840f4648cd862de3be28e37736ce0ef46ed304cbe810593dfa4264eec969c9e0c8dafb854298280f7
HEAD_REF master
PATCHES
sd-generic-backend-init.patch
sd-android-vulkan-diagnostics.patch
abort-callback.patch
fix-failure-path-cleanup.patch
)

set(SD_FLASH_ATTN OFF)

if("flash-attn" IN_LIST FEATURES)
set(SD_FLASH_ATTN ON)
endif()

# Only build Release — debug builds are not needed for the prebuild and can
# fail with MSVC iterator-debug-level mismatches.
set(VCPKG_BUILD_TYPE release)

# --- Configure & build ---
vcpkg_cmake_configure(
SOURCE_PATH "${SOURCE_PATH}"
DISABLE_PARALLEL_CONFIGURE
OPTIONS
-DSD_BUILD_EXAMPLES=OFF
-DSD_BUILD_SHARED_LIBS=OFF
-DSD_USE_SYSTEM_GGML=ON
-DSD_FLASH_ATTN=${SD_FLASH_ATTN}
MAYBE_UNUSED_VARIABLES
SD_FLASH_ATTN
)

vcpkg_cmake_install()

# --- CMake package config ---
# Upstream does not export a CMake config, so we ship our own that defines
# stable-diffusion::stable-diffusion with ggml as a transitive dependency.
file(INSTALL
"${CMAKE_CURRENT_LIST_DIR}/stable-diffusion-cppConfig.cmake"
"${CMAKE_CURRENT_LIST_DIR}/stable-diffusion-cppConfigVersion.cmake"
DESTINATION "${CURRENT_PACKAGES_DIR}/share/stable-diffusion-cpp"
)

# --- Cleanup ---
file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")

set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled)

file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}")
vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")

Loading
Loading