diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index ad23f7dd757d1..815db6a2d8c20 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -255,11 +255,11 @@ effectiveStdenv.mkDerivation ( # Configurations we don't want even the CI to evaluate. Results in the # "unsupported platform" messages. This is mostly a no-op, because # cudaPackages would've refused to evaluate anyway. - badPlatforms = optionals (useCuda || useOpenCL || useVulkan) lib.platforms.darwin; + badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; # Configurations that are known to result in build failures. Can be # overridden by importing Nixpkgs with `allowBroken = true`. - broken = (useMetalKit && !effectiveStdenv.isDarwin) || (useVulkan && effectiveStdenv.isDarwin); + broken = (useMetalKit && !effectiveStdenv.isDarwin); description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; homepage = "https://github.com/ggerganov/llama.cpp/"; diff --git a/flake.nix b/flake.nix index ad2f9b2951036..dc4e503c33061 100644 --- a/flake.nix +++ b/flake.nix @@ -150,6 +150,7 @@ packages = { default = config.legacyPackages.llamaPackages.llama-cpp; + vulkan = config.packages.default.override { useVulkan = true; }; } // lib.optionalAttrs pkgs.stdenv.isLinux { opencl = config.packages.default.override { useOpenCL = true; }; @@ -157,7 +158,6 @@ mpi-cpu = config.packages.default.override { useMpi = true; }; mpi-cuda = config.packages.default.override { useMpi = true; }; - vulkan = config.packages.default.override { useVulkan = true; }; } // lib.optionalAttrs (system == "x86_64-linux") { rocm = config.legacyPackages.llamaPackagesRocm.llama-cpp; diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 4a30414df2fce..4e5eaff15110b 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -1091,7 +1091,10 @@ static void ggml_vk_print_gpu_info(size_t idx) { } } -static void ggml_vk_instance_init() { +static bool ggml_vk_instance_validation_ext_available(const std::vector& instance_extensions); +static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector& instance_extensions); + +void ggml_vk_instance_init() { if (vk_instance_initialized) { return; } @@ -1100,28 +1103,42 @@ static void ggml_vk_instance_init() { #endif vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION }; - const std::vector layers = { -#ifdef GGML_VULKAN_VALIDATE - "VK_LAYER_KHRONOS_validation", -#endif - }; - const std::vector extensions = { -#ifdef GGML_VULKAN_VALIDATE - "VK_EXT_validation_features", -#endif - }; - vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags(), &app_info, layers, extensions); -#ifdef GGML_VULKAN_VALIDATE - const std::vector features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices }; - vk::ValidationFeaturesEXT validation_features = { - features_enable, - {}, - }; - validation_features.setPNext(nullptr); - instance_create_info.setPNext(&validation_features); - std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; -#endif + const std::vector instance_extensions = vk::enumerateInstanceExtensionProperties(); + const bool validation_ext = ggml_vk_instance_validation_ext_available(instance_extensions); + const bool portability_enumeration_ext = ggml_vk_instance_portability_enumeration_ext_available(instance_extensions); + + std::vector layers; + + if (validation_ext) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + std::vector extensions; + if (validation_ext) { + extensions.push_back("VK_EXT_validation_features"); + } + if (portability_enumeration_ext) { + extensions.push_back("VK_KHR_portability_enumeration"); + } + vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags{}, &app_info, layers, extensions); + if (portability_enumeration_ext) { + instance_create_info.flags |= vk::InstanceCreateFlagBits::eEnumeratePortabilityKHR; + } + + std::vector features_enable; + vk::ValidationFeaturesEXT validation_features; + + if (validation_ext) { + features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices }; + validation_features = { + features_enable, + {}, + }; + validation_features.setPNext(nullptr); + instance_create_info.setPNext(&validation_features); + + std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl; + } vk_instance.instance = vk::createInstance(instance_create_info); memset(vk_instance.initialized, 0, sizeof(bool) * GGML_VK_MAX_DEVICES); @@ -1168,12 +1185,12 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { vk_instance.devices[idx] = std::make_shared(); ctx->device = vk_instance.devices[idx]; ctx->device.lock()->physical_device = devices[dev_num]; - std::vector ext_props = ctx->device.lock()->physical_device.enumerateDeviceExtensionProperties(); + const std::vector ext_props = ctx->device.lock()->physical_device.enumerateDeviceExtensionProperties(); bool maintenance4_support = false; // Check if maintenance4 is supported - for (auto properties : ext_props) { + for (const auto& properties : ext_props) { if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) { maintenance4_support = true; } @@ -1204,7 +1221,7 @@ static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { bool fp16_storage = false; bool fp16_compute = false; - for (auto properties : ext_props) { + for (const auto& properties : ext_props) { if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) { fp16_storage = true; } else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) { @@ -5301,6 +5318,42 @@ GGML_CALL int ggml_backend_vk_reg_devices() { return vk_instance.device_indices.size(); } +// Extension availability +static bool ggml_vk_instance_validation_ext_available(const std::vector& instance_extensions) { +#ifdef GGML_VULKAN_VALIDATE + bool portability_enumeration_ext = false; + // Check for portability enumeration extension for MoltenVK support + for (const auto& properties : instance_extensions) { + if (strcmp("VK_KHR_portability_enumeration", properties.extensionName) == 0) { + return true; + } + } + if (!portability_enumeration_ext) { + std::cerr << "ggml_vulkan: WARNING: Instance extension VK_KHR_portability_enumeration not found." << std::endl; + } +#endif + return false; + + UNUSED(instance_extensions); +} +static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector& instance_extensions) { +#ifdef __APPLE__ + bool portability_enumeration_ext = false; + // Check for portability enumeration extension for MoltenVK support + for (const auto& properties : instance_extensions) { + if (strcmp("VK_KHR_portability_enumeration", properties.extensionName) == 0) { + return true; + } + } + if (!portability_enumeration_ext) { + std::cerr << "ggml_vulkan: WARNING: Instance extension VK_KHR_portability_enumeration not found." << std::endl; + } +#endif + return false; + + UNUSED(instance_extensions); +} + // checks #ifdef GGML_VULKAN_CHECK_RESULTS diff --git a/ggml.c b/ggml.c index 4ee2c5e11a002..d129df50548a3 100644 --- a/ggml.c +++ b/ggml.c @@ -273,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) { #include #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions #include "ggml-opencl.h" +#elif defined(GGML_USE_VULKAN) +#include "ggml-vulkan.h" #endif #elif defined(GGML_USE_OPENBLAS) #if defined(GGML_BLAS_USE_MKL)