diff --git a/CMakeLists.txt b/CMakeLists.txt index f3bbfb58c2..557400862b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,6 +178,7 @@ option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF) option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE}) option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) +option(GGML_CPU_STATIC "ggml: build CPU backend as static library even with GGML_BACKEND_DL" OFF) set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC") @@ -186,6 +187,14 @@ set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline option(GGML_CPU "ggml: enable CPU backend" ON) option(GGML_SCHED_NO_REALLOC "ggml: disallow reallocations in ggml-alloc (for debugging)" OFF) +# When non-empty, ggml libs land on disk as `libggml-*.{a,so,dll}` and the +# runtime backend loader looks for the same prefix. CMake target names and the +# `find_package(ggml CONFIG)` package name are unchanged. Defaults to +# `qvac-speech-` on the speech branch to match the loader's compiled-in default +# in `ggml-backend-reg.cpp`. +set(GGML_LIB_OUTPUT_PREFIX "qvac-speech-" CACHE STRING + "ggml: prefix for built ggml library filenames") + # 3rd party libs / backends option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON) option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) @@ -294,6 +303,41 @@ include(GNUInstallDirs) add_subdirectory(src) +if (NOT GGML_LIB_OUTPUT_PREFIX STREQUAL "") + function(_ggml_apply_lib_output_prefix target) + if (NOT TARGET ${target}) + return() + endif() + get_target_property(_lop_type ${target} TYPE) + if (_lop_type STREQUAL "INTERFACE_LIBRARY" OR + _lop_type STREQUAL "OBJECT_LIBRARY") + return() + endif() + get_target_property(_lop_old ${target} OUTPUT_NAME) + if (NOT _lop_old OR _lop_old STREQUAL "_lop_old-NOTFOUND") + set(_lop_old ${target}) + endif() + set_target_properties(${target} PROPERTIES + OUTPUT_NAME "${GGML_LIB_OUTPUT_PREFIX}${_lop_old}") + endfunction() + + foreach (_lop_target ggml ggml-base) + _ggml_apply_lib_output_prefix(${_lop_target}) + endforeach() + if (DEFINED GGML_AVAILABLE_BACKENDS) + foreach (_lop_target ${GGML_AVAILABLE_BACKENDS}) + _ggml_apply_lib_output_prefix(${_lop_target}) + endforeach() + endif() + + if (TARGET ggml-base) + target_compile_definitions(ggml-base PRIVATE + GGML_BACKEND_DL_PROJECT_PREFIX="${GGML_LIB_OUTPUT_PREFIX}") + endif() + + message(STATUS "ggml: lib output prefix '${GGML_LIB_OUTPUT_PREFIX}'") +endif() + # # tests and examples # @@ -324,6 +368,7 @@ set(GGML_PUBLIC_HEADERS include/ggml-cpp.h include/ggml-cuda.h include/ggml-opt.h + include/ggml-opencl.h include/ggml-metal.h include/ggml-rpc.h include/ggml-virtgpu.h diff --git a/cmake/ggml-config.cmake.in b/cmake/ggml-config.cmake.in index 91c9d5cd34..09e2ca9a9c 100644 --- a/cmake/ggml-config.cmake.in +++ b/cmake/ggml-config.cmake.in @@ -104,7 +104,7 @@ set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@") if(NOT TARGET ggml::ggml) find_package(Threads REQUIRED) - find_library(GGML_LIBRARY ggml + find_library(GGML_LIBRARY NAMES "${GGML_LIB_OUTPUT_PREFIX}ggml" ggml REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) @@ -112,25 +112,39 @@ if(NOT TARGET ggml::ggml) add_library(ggml::ggml UNKNOWN IMPORTED) set_target_properties(ggml::ggml PROPERTIES - IMPORTED_LOCATION "${GGML_LIBRARY}") + IMPORTED_LOCATION "${GGML_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}") - find_library(GGML_BASE_LIBRARY ggml-base + find_library(GGML_BASE_LIBRARY NAMES "${GGML_LIB_OUTPUT_PREFIX}ggml-base" ggml-base REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) add_library(ggml::ggml-base UNKNOWN IMPORTED) + set_property(TARGET ggml::ggml-base PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}") set_target_properties(ggml::ggml-base PROPERTIES IMPORTED_LOCATION "${GGML_BASE_LIBRARY}") + if(GGML_MAX_NAME) + set_property(TARGET ggml::ggml-base APPEND PROPERTY + INTERFACE_COMPILE_DEFINITIONS "GGML_MAX_NAME=${GGML_MAX_NAME}") + endif() + set(_ggml_all_targets "") - if (NOT GGML_BACKEND_DL) + # In hybrid mode (GGML_BACKEND_DL + GGML_CPU_STATIC), only the CPU backend + # is static and must still be exported to downstream consumers. + if (NOT GGML_BACKEND_DL OR GGML_CPU_STATIC) foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS}) + string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}") + if (GGML_BACKEND_DL AND GGML_CPU_STATIC AND NOT is_cpu_variant) + continue() + endif() string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}") string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx) - find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend} + find_library(${_ggml_backend_pfx}_LIBRARY + NAMES "${GGML_LIB_OUTPUT_PREFIX}${_ggml_backend}" "${_ggml_backend}" REQUIRED HINTS ${GGML_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 78853304d9..d619c21623 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -185,8 +185,10 @@ endif() # ggml -if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS) - message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS") +# GGML_BACKEND_DL works with static core when PIC is enabled below. + +if (GGML_CPU_STATIC AND GGML_CPU_ALL_VARIANTS) + message(FATAL_ERROR "GGML_CPU_STATIC is incompatible with GGML_CPU_ALL_VARIANTS") endif() add_library(ggml-base @@ -221,6 +223,10 @@ if (GGML_SCHED_NO_REALLOC) target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC) endif() +if (DEFINED GGML_MAX_NAME) + target_compile_definitions(ggml-base PUBLIC GGML_MAX_NAME=${GGML_MAX_NAME}) +endif() + add_library(ggml ggml-backend-dl.cpp ggml-backend-reg.cpp) @@ -245,10 +251,11 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") endif() function(ggml_add_backend_library backend) - if (GGML_BACKEND_DL) + if (GGML_BACKEND_DL AND NOT (GGML_CPU_STATIC AND ${backend} MATCHES "^ggml-cpu")) add_library(${backend} MODULE ${ARGN}) # write the shared library to the output directory - set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) + set_target_properties(${backend} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL) add_dependencies(ggml ${backend}) if (GGML_BACKEND_DIR) @@ -265,7 +272,7 @@ function(ggml_add_backend_library backend) target_link_libraries(${backend} PRIVATE ggml-base) target_include_directories(${backend} PRIVATE ..) - if (${BUILD_SHARED_LIBS}) + if (BUILD_SHARED_LIBS OR GGML_BACKEND_DL) target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD) target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED) endif() @@ -301,6 +308,9 @@ function(ggml_add_backend backend) string(TOUPPER "GGML_USE_${backend}" backend_use) target_compile_definitions(ggml PUBLIC ${backend_use}) endif() + if (GGML_CPU_STATIC AND "${backend}" STREQUAL "CPU") + target_compile_definitions(ggml PUBLIC GGML_USE_CPU) + endif() endif() endfunction() @@ -484,7 +494,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "visionOS") target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE) endif() -if (BUILD_SHARED_LIBS) +if (BUILD_SHARED_LIBS OR GGML_BACKEND_DL) foreach (target ggml-base ggml) set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${target} PRIVATE GGML_BUILD) diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp index 20b3a5482f..8fa8102436 100644 --- a/src/ggml-backend-reg.cpp +++ b/src/ggml-backend-reg.cpp @@ -462,9 +462,9 @@ static fs::path backend_filename_prefix() { #endif #else #ifdef _WIN32 - return fs::u8path("ggml-"); + return fs::u8path("qvac-speech-ggml-"); #else - return fs::u8path("libggml-"); + return fs::u8path("libqvac-speech-ggml-"); #endif #endif } @@ -553,6 +553,13 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, } } } + // Android app packaging can flatten native libraries into one directory. + // If loading from the requested subdirectory fails, retry by filename only + // and leave lookup to dlopen's default search path resolution. + fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native(); + if (auto reg = get_reg().load_backend(filename, silent)) { + return reg; + } return nullptr; } diff --git a/src/ggml-opencl/ggml-opencl.cpp b/src/ggml-opencl/ggml-opencl.cpp index 7c2e4bc2f9..cb42e27af0 100644 --- a/src/ggml-opencl/ggml-opencl.cpp +++ b/src/ggml-opencl/ggml-opencl.cpp @@ -4353,15 +4353,6 @@ static ggml_backend_i ggml_backend_opencl_i = { }; ggml_backend_t ggml_backend_opencl_init(void) { - // qvac-parakeet patch: bail out cleanly when the OpenCL backend - // discovery saw zero usable devices. Upstream calls - // ggml_backend_reg_dev_get() unconditionally, which asserts on an - // empty device list. Parakeet's host code expects a nullable result - // from ggml_backend_opencl_init() (it falls back to CPU when the - // returned backend is null); the assertion makes that fallback path - // unreachable on hosts where ggml-opencl can't find any GPU it - // accepts (Adreno-only environments without an Adreno device, - // headless CI runners, etc.). ggml_backend_reg_t reg = ggml_backend_opencl_reg(); if (ggml_backend_reg_dev_count(reg) == 0) { return nullptr; @@ -6272,7 +6263,11 @@ static size_t ggml_backend_opencl_reg_device_count(ggml_backend_reg_t reg) { } static ggml_backend_dev_t ggml_backend_opencl_reg_device_get(ggml_backend_reg_t reg, size_t index) { - GGML_ASSERT(index < ggml_backend_opencl_reg_device_count(reg)); + size_t n = ggml_backend_opencl_reg_device_count(reg); + if (n == 0) { + return nullptr; + } + GGML_ASSERT(index < n); return &g_ggml_backend_opencl_devices[index]; diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp index 19e7fbdaae..9f1b42749e 100644 --- a/src/ggml-vulkan/ggml-vulkan.cpp +++ b/src/ggml-vulkan/ggml-vulkan.cpp @@ -23,8 +23,14 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher(); #include #include +#include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -864,6 +870,24 @@ struct vk_device_struct { bool allow_sysmem_fallback; bool disable_graph_optimize; + // Optional persistent VkPipelineCache. Enabled only when the caller + // sets GGML_VK_PIPELINE_CACHE_DIR to a non-empty path. When enabled, + // createPipelineCache is seeded from disk at init and getPipelineCacheData + // is written back from ggml_vk_cleanup(), so repeated + // ggml_backend_vk_init() invocations (and separate processes) skip the + // shader-compile wave that Vulkan normally pays on every cold + // command-buffer graph-build. When pipeline_cache is VK_NULL_HANDLE + // (default / opt-out / mkdir failure) behaviour is identical to upstream + // (createComputePipeline takes VK_NULL_HANDLE, which is legal). + vk::PipelineCache pipeline_cache = VK_NULL_HANDLE; + std::string pipeline_cache_path; + // QVAC-17872 round-2: bytes already on disk for this cache. Used by + // the eager flush in ggml_vk_load_shaders to skip the disk write on + // pure cache-hit paths (warm runs where every pipeline came from the + // seed blob): if getPipelineCacheData().size() == this value, the + // cache content is unchanged and there is nothing to persist. + size_t pipeline_cache_last_size = 0; + std::unique_ptr memory_logger; ~vk_device_struct() { @@ -888,10 +912,71 @@ struct vk_device_struct { device.destroyDescriptorSetLayout(dsl); + // Destroy the VkPipelineCache handle here if it's still alive. The + // on-disk persistence happens earlier, in ggml_vk_cleanup(), because + // this destructor is not reliably reached at process exit: pipelines + // and helpers hold shared_ptr refs that keep the + // refcount above 0 until well after the Vulkan dispatcher is gone. + if (pipeline_cache) { + device.destroyPipelineCache(pipeline_cache); + pipeline_cache = VK_NULL_HANDLE; + } + device.destroy(); } }; +// Flush the optional persistent pipeline cache to disk. Called from +// ggml_vk_cleanup() while the device shared_ptr is still alive and the +// Vulkan dispatcher is still valid. Safe to call multiple times per device +// (the write is atomic via tmp + rename; idempotent). No-op when persistent +// caching was not enabled at init time. +static void ggml_vk_save_pipeline_cache(vk_device & device) { + if (!device || !device->pipeline_cache || device->pipeline_cache_path.empty()) { + return; + } + try { + const std::vector blob = device->device.getPipelineCacheData(device->pipeline_cache); + if (blob.empty()) { + return; + } + // QVAC-17872 round-2: skip the disk write if the cache content + // is byte-equivalent in size to what we already have on disk. + // Avoids re-writing 1 MB on every cleanup of a process that + // didn't compile any new pipelines (warm runs). The eager-flush + // path in ggml_vk_load_shaders uses the same pipeline_cache_last_size + // bookkeeping so they cooperate idempotently. + if (blob.size() == device->pipeline_cache_last_size) { + return; + } + const std::filesystem::path final_path(device->pipeline_cache_path); + std::filesystem::path tmp_path = final_path; + tmp_path += ".tmp"; + std::ofstream out(tmp_path, std::ios::binary | std::ios::trunc); + if (!out) { + return; + } + out.write(reinterpret_cast(blob.data()), + static_cast(blob.size())); + out.close(); + if (out.good()) { + std::error_code ec; + std::filesystem::rename(tmp_path, final_path, ec); + if (!ec) { + device->pipeline_cache_last_size = blob.size(); + } else { + std::error_code ignore; + std::filesystem::remove(tmp_path, ignore); + } + } else { + std::error_code ignore; + std::filesystem::remove(tmp_path, ignore); + } + } catch (const std::exception &) { + // best-effort; silently drop the write + } +} + void vk_command_pool::init(vk_device& device, vk_queue *q_) { cmd_buffers.clear(); q = q_; @@ -2206,7 +2291,10 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin #endif try { - pipeline->pipeline = device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; + // device->pipeline_cache is VK_NULL_HANDLE when persistent caching is + // opt-ed-out or its init failed; VK treats that as "no cache" — same + // as before this patch. + pipeline->pipeline = device->device.createComputePipeline(device->pipeline_cache, compute_pipeline_create_info).value; } catch (const vk::SystemError& e) { std::cerr << "ggml_vulkan: Compute pipeline creation failed for " << pipeline->name << std::endl; std::cerr << "ggml_vulkan: " << e.what() << std::endl; @@ -4783,6 +4871,53 @@ static void ggml_vk_load_shaders(vk_device& device) { for (auto &c : compiles) { c.wait(); } + + // QVAC-17872 round-2: persist the pipeline cache eagerly when this + // load_shaders call actually GREW the cache (i.e. compiled at least + // one pipeline whose SPIR-V was not already in the seed blob). + // Without this, lazy-compile work done by + // ggml_pipeline_request_descriptor_sets during a long-running graph + // compute is only flushed in ggml_vk_cleanup at backend free time — + // a process crash in between throws away the entire cold-compile + // wave and the next process pays it again. + // + // Crucially, on a warm run with a populated seed blob, every + // pipeline still goes through createComputePipeline → compiles is + // non-empty → but getPipelineCacheData().size() == seed size, so we + // skip the disk write. This keeps warm-run overhead at zero (we + // measured a +90 ms WALL regression with an unconditional flush). + if (!compiles.empty() && device->pipeline_cache && !device->pipeline_cache_path.empty()) { + try { + const std::vector blob = device->device.getPipelineCacheData(device->pipeline_cache); + if (!blob.empty() && blob.size() > device->pipeline_cache_last_size) { + const std::filesystem::path final_path(device->pipeline_cache_path); + std::filesystem::path tmp_path = final_path; + tmp_path += ".tmp"; + std::ofstream out(tmp_path, std::ios::binary | std::ios::trunc); + if (out) { + out.write(reinterpret_cast(blob.data()), + static_cast(blob.size())); + out.close(); + if (out.good()) { + std::error_code ec; + std::filesystem::rename(tmp_path, final_path, ec); + if (!ec) { + device->pipeline_cache_last_size = blob.size(); + } else { + std::error_code ignore; + std::filesystem::remove(tmp_path, ignore); + } + } else { + std::error_code ignore; + std::filesystem::remove(tmp_path, ignore); + } + } + } + } catch (const std::exception &) { + // best-effort; on any failure we silently fall back to the + // ggml_vk_cleanup-time flush. + } + } } static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch); @@ -5507,6 +5642,87 @@ static vk_device ggml_vk_get_device(size_t idx) { descriptor_set_layout_create_info.setPNext(&dslbfci); device->dsl = device->device.createDescriptorSetLayout(descriptor_set_layout_create_info); + // ------------------------------------------------------------------- + // Persistent VkPipelineCache (explicit opt-in only). + // + // Enabled by setting GGML_VK_PIPELINE_CACHE_DIR to a non-empty + // directory path. When unset or empty the feature is off and + // behaviour is byte-identical to upstream ggml-vulkan. + // + // No auto-discovery of $XDG_CACHE_HOME or $HOME: ggml is a library + // distributed through package managers and consumed by applications + // that should decide whether and where to persist Vulkan artefacts. + // Writing to the user's home directory without being asked is a + // side effect library consumers cannot see from the API surface. + // + // Filename keyed on vendorID/deviceID/driverVersion; Vulkan itself + // validates the blob header and silently ignores stale data if the + // shader bundle or driver changed. + // + // The cache is consulted by createComputePipeline in + // ggml_vk_create_pipeline_func and flushed back to disk from + // ggml_vk_cleanup(). A cold first-process graph dispatch that used + // to pay seconds of shader compile drops to tens of ms on drivers + // without an aggressive per-app system cache (Mesa/RADV, + // Android Adreno/Mali, fresh NVIDIA installs, containers). + // See: QVAC-17872 for measured cold->warm deltas. + // ------------------------------------------------------------------- + { + const char * env_dir = getenv("GGML_VK_PIPELINE_CACHE_DIR"); + + std::string dir; + if (env_dir != nullptr && *env_dir != '\0') { + dir = env_dir; + } + + if (!dir.empty()) { + std::error_code mkec; + std::filesystem::create_directories(dir, mkec); + (void) mkec; // on failure we still try createPipelineCache with an empty seed + + char fname[64]; + snprintf(fname, sizeof(fname), + "%04x-%04x-%08x.pcache", + (unsigned) device->properties.vendorID, + (unsigned) device->properties.deviceID, + (unsigned) device->properties.driverVersion); + device->pipeline_cache_path = (std::filesystem::path(dir) / fname).string(); + + std::vector seed; + { + std::ifstream in(device->pipeline_cache_path, std::ios::binary | std::ios::ate); + if (in) { + const std::streamoff n = in.tellg(); + if (n > 0) { + seed.resize(static_cast(n)); + in.seekg(0, std::ios::beg); + in.read(reinterpret_cast(seed.data()), static_cast(seed.size())); + if (!in) seed.clear(); + } + } + } + + vk::PipelineCacheCreateInfo pci( + {}, + seed.size(), + seed.empty() ? nullptr : seed.data()); + try { + device->pipeline_cache = device->device.createPipelineCache(pci); + // QVAC-17872 round-2: seed size matches the disk blob; + // if the eager-flush path observes the same size after + // a load_shaders call, it's a pure cache-hit run and + // the disk write is skipped. The driver may rewrite + // header fields that change blob.size() vs file size + // by a few bytes — that's still a one-time growth and + // we'll write the new size, then steady-state from there. + device->pipeline_cache_last_size = seed.size(); + } catch (const vk::SystemError &) { + device->pipeline_cache = VK_NULL_HANDLE; + device->pipeline_cache_path.clear(); + } + } + } + ggml_vk_load_shaders(device); // Only use transfer queue on AMD non-GCN, when the graphics queue is not enabled @@ -13357,6 +13573,13 @@ static void ggml_vk_graph_cleanup(ggml_backend_vk_context * ctx) { // Clean up on backend free static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) { VK_LOG_DEBUG("ggml_vk_cleanup(" << ctx->name << ")"); + + // Persist the optional on-disk pipeline cache while the device shared_ptr + // and the Vulkan dispatcher are still valid. Doing this from + // ~vk_device_struct() is unreliable: pipelines and helpers hold + // shared_ptr refs that keep the refcount non-zero by + // typical process-exit time, so the device destructor often never runs. + ggml_vk_save_pipeline_cache(ctx->device); // discard any unsubmitted command buffers ctx->compute_ctx.reset(); // wait for any pending command buffers to finish @@ -15895,7 +16118,8 @@ static size_t ggml_backend_vk_reg_get_device_count(ggml_backend_reg_t reg) { } static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, size_t device) { - static std::vector devices; + static std::vector> devices; + static std::vector> contexts; static bool initialized = false; @@ -15905,7 +16129,7 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, if (!initialized) { const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) { - ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context; + auto ctx = std::make_unique(); char desc[256]; ggml_backend_vk_get_device_description(i, desc, sizeof(desc)); ctx->device = i; @@ -15914,18 +16138,20 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu; ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i); ctx->op_offload_min_batch_size = min_batch_size; - devices.push_back(new ggml_backend_device { + auto dev = std::make_unique(ggml_backend_device { /* .iface = */ ggml_backend_vk_device_i, /* .reg = */ reg, - /* .context = */ ctx, + /* .context = */ ctx.get(), }); + contexts.push_back(std::move(ctx)); + devices.push_back(std::move(dev)); } initialized = true; } } GGML_ASSERT(device < devices.size()); - return devices[device]; + return devices[device].get(); } static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {