From 3f9792f48cd022f2be9ea1dc792583fd8056571b Mon Sep 17 00:00:00 2001 From: Zbigniew Herman Date: Tue, 2 Jun 2026 10:58:26 +0200 Subject: [PATCH 1/3] ggml-backend-reg: Adreno-aware OpenCL backend selection on Android Port the Android GPU backend-selection policy from qvac-fabric-llm.cpp's ggml fork into the speech ggml fork, so the speech stack (whisper / parakeet / tts) selects GPU backends the same way the LLM stack does. Problem: ggml_backend_load_all_from_path() loaded Vulkan AND OpenCL unconditionally. On Adreno, ggml registers both for the same GPU and Vulkan loads first, so whisper.cpp's default gpu_device=0 lands on the Adreno Vulkan driver, which SIGSEGVs in vkCmdBindPipeline during ggml compute (observed on Samsung S25 Ultra, device-farm run 26646220900). On non-Adreno GPUs (e.g. Mali) Vulkan is the correct, stable path and OpenCL must not be used. Fix (mirrors fabric-llm): on Android, after loading Vulkan, use it to detect the GPU and decide whether to load OpenCL: - not Adreno -> skip OpenCL (Vulkan/CPU), e.g. Mali keeps Vulkan - Adreno > 700 -> load OpenCL (Adreno's stable ggml path) - Adreno 601-700 -> unload Vulkan, CPU only (both backends buggy) - Adreno <= 600 -> load OpenCL (unchanged fall-through) Implemented via ggml_backend_reg_by_name("vulkan") + ggml_backend_min_adreno_version() (using the public ggml_backend_reg_dev_* / ggml_backend_dev_description APIs) + ggml_backend_unload(). Off Android the behaviour is unchanged and byte-identical: `load_opencl` stays true and OpenCL is loaded in exactly the same position as before (right after virtgpu, before hexagon). The whole Adreno block is `#ifdef __ANDROID__`. The pure GPU-string parser is extracted to a header (src/ggml-adreno.h, inline, dependency-free) so it can be unit-tested without a GPU; it also lowercases the description (a small hardening over fabric-llm's raw substring check). New tests/test-adreno-version.cpp covers 18 cases (Adreno variants, Mali/NVIDIA/AMD/Apple/Intel/llvmpipe non-Adreno, empty, and unparseable); wired into ctest as test-adreno-version (always built, no ggml link). Validation (x64-linux): - ggml lib builds clean (ggml-backend-reg.cpp.o) - ggml-backend-reg.cpp -D__ANDROID__ -fsyntax-only: clean (Android block compiles) - test-adreno-version: 18/18 pass via ctest The Adreno runtime selection itself is validated end-to-end on the device farm via transcription-whispercpp's GPU test (Samsung S25 -> OpenCL). Co-authored-by: Cursor --- src/ggml-adreno.h | 50 +++++++++++++++++++++++++++ src/ggml-backend-reg.cpp | 65 ++++++++++++++++++++++++++++++++++- tests/CMakeLists.txt | 12 +++++++ tests/test-adreno-version.cpp | 62 +++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 src/ggml-adreno.h create mode 100644 tests/test-adreno-version.cpp diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h new file mode 100644 index 0000000000..054490591a --- /dev/null +++ b/src/ggml-adreno.h @@ -0,0 +1,50 @@ +#pragma once + +// Adreno GPU detection helper, shared between the Android backend-selection +// logic in ggml-backend-reg.cpp and its unit test (tests/test-adreno-version.cpp). +// +// Header-only (inline) and dependency-free so the unit test can exercise the +// pure parsing logic without linking the ggml runtime or needing a GPU. +// +// Mirrors the Adreno-version policy used by qvac-fabric-llm.cpp's ggml fork so +// the speech stack (whisper / parakeet / tts) selects GPU backends the same +// way the LLM stack does: on Android, detect the GPU through Vulkan (present +// on virtually every Android GPU) and only fall back to OpenCL for Adreno, +// whose Vulkan compute path is unstable. + +#include +#include +#include +#include + +// Parse the Adreno GPU generation from a ggml device description. +// e.g. "Adreno (TM) 830" -> 830, "Adreno 730" -> 730 +// Returns: +// > 0 : the parsed Adreno generation number +// -1 : the description is not an Adreno GPU +// -3 : the description is an Adreno GPU but the version failed to parse +// +// The description is lowercased before matching so "Adreno"/"ADRENO"/"adreno" +// are all recognised (a small hardening over the raw substring check in +// qvac-fabric-llm.cpp; it never produces a false negative for Adreno). +inline int ggml_adreno_version_from_description(const std::string & gpu_description) { + std::string lowered = gpu_description; + std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + + if (lowered.find("dreno") == std::string::npos) { + return -1; + } + + static const std::regex digits_regex(R"((\d+))"); + std::smatch matches; + if (std::regex_search(lowered, matches, digits_regex) && matches.size() > 1) { + try { + return std::stoi(matches[1].str()); + } catch (const std::exception &) { + return -3; + } + } + return -3; +} diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp index 5ac3549dfc..68f272b00b 100644 --- a/src/ggml-backend-reg.cpp +++ b/src/ggml-backend-reg.cpp @@ -2,9 +2,11 @@ #include "ggml-backend.h" #include "ggml-backend-dl.h" #include "ggml-impl.h" +#include "ggml-adreno.h" #include #include #include +#include #include #include #include @@ -687,6 +689,35 @@ void ggml_backend_load_all() { ggml_backend_load_all_from_path(nullptr); } +#ifdef __ANDROID__ +namespace { +// Smallest Adreno generation among the GPU devices a (Vulkan) backend exposes, +// or a negative sentinel: -2 if `reg` is null, -1 if no Adreno GPU is present. +// Vulkan is used as the probe because it is present on virtually every Android +// GPU, so the GPU can be identified before deciding whether to load OpenCL. +// Mirrors qvac-fabric-llm.cpp's ggml fork (the LLM stack's backend selection). +int ggml_backend_min_adreno_version(ggml_backend_reg_t reg) { + if (reg == nullptr) { + return -2; + } + int min_found = std::numeric_limits::max(); + for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { + ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, i); + if (dev == nullptr) { + continue; + } + const char * description = ggml_backend_dev_description(dev); + GGML_LOG_INFO("%s: found device description: %s\n", __func__, description ? description : "(null)"); + const int dev_adreno_version = ggml_adreno_version_from_description(description ? description : ""); + if (dev_adreno_version > 0) { + min_found = std::min(min_found, dev_adreno_version); + } + } + return (min_found < std::numeric_limits::max()) ? min_found : -1; +} +} // namespace +#endif // __ANDROID__ + void ggml_backend_load_all_from_path(const char * dir_path) { #ifdef NDEBUG bool silent = true; @@ -704,7 +735,39 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path); ggml_backend_load_best("virtgpu", silent, dir_path); - ggml_backend_load_best("opencl", silent, dir_path); + + // OpenCL is only useful (and stable) for ggml on Adreno GPUs; on every + // other GPU the Adreno-tuned OpenCL kernels are either unsupported or buggy. + // On Android, use the already-loaded Vulkan backend to detect the GPU and + // only keep OpenCL for an Adreno that benefits from it. This mirrors + // qvac-fabric-llm.cpp's ggml fork so the speech stack selects backends the + // same way the LLM stack does. Off Android (or when no Vulkan backend is + // present) behaviour is unchanged: OpenCL is loaded unconditionally here. + bool load_opencl = true; +#ifdef __ANDROID__ + { + ggml_backend_reg_t vulkan_backend = ggml_backend_reg_by_name("vulkan"); + const int min_adreno_version = ggml_backend_min_adreno_version(vulkan_backend); + if (min_adreno_version <= 0) { + GGML_LOG_INFO("%s: no Adreno GPU detected (%d); skipping OpenCL, relying on Vulkan/CPU\n", + __func__, min_adreno_version); + load_opencl = false; + } else if (min_adreno_version > 700) { + GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version); + } else if (min_adreno_version > 600) { + GGML_LOG_INFO("%s: Adreno %d detected; removing Vulkan and relying on CPU only\n", + __func__, min_adreno_version); + if (vulkan_backend != nullptr) { + ggml_backend_unload(vulkan_backend); + } + load_opencl = false; + } + } +#endif // __ANDROID__ + if (load_opencl) { + ggml_backend_load_best("opencl", silent, dir_path); + } + ggml_backend_load_best("hexagon", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path); ggml_backend_load_best("openvino", silent, dir_path); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7c28e344c5..cbaf33c57c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -168,6 +168,18 @@ target_link_libraries(${TEST_TARGET} PRIVATE ggml Threads::Threads) add_test(NAME ${TEST_TARGET} COMMAND $) set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") +# +# test-adreno-version +# Pure unit test for src/ggml-adreno.h (Adreno GPU-string parser used by the +# Android OpenCL/Vulkan backend-selection policy). Header-only: needs the src/ +# include path but does not link the ggml runtime. Always built. + +set(TEST_TARGET test-adreno-version) +add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) +target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src) +add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") + if (NOT GGML_BACKEND_DL) # diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp new file mode 100644 index 0000000000..40e8068075 --- /dev/null +++ b/tests/test-adreno-version.cpp @@ -0,0 +1,62 @@ +// Unit test for ggml_adreno_version_from_description() — the pure GPU-string +// parser behind the Android Adreno backend-selection policy in +// ggml-backend-reg.cpp (see src/ggml-adreno.h). +// +// Pure / header-only: does not link the ggml runtime or need a GPU, so it runs +// on every host/CI. The hardware-dependent half of the policy +// (ggml_backend_min_adreno_version + the OpenCL load decision) is exercised +// end-to-end on the device farm via transcription-whispercpp's GPU test. + +#include "ggml-adreno.h" + +#include +#include + +static int g_failures = 0; + +static void expect_version(const std::string & description, int expected) { + const int got = ggml_adreno_version_from_description(description); + if (got != expected) { + std::printf("FAIL: \"%s\" -> %d (expected %d)\n", description.c_str(), got, expected); + g_failures++; + } else { + std::printf("ok: \"%s\" -> %d\n", description.c_str(), got); + } +} + +int main() { + // Real Adreno descriptions (as reported via the Vulkan device name). + expect_version("Adreno (TM) 830", 830); // Samsung S25 (Snapdragon 8 Elite) + expect_version("Adreno (TM) 750", 750); // Snapdragon 8 Gen 3 + expect_version("Adreno (TM) 740", 740); + expect_version("Adreno (TM) 660", 660); + expect_version("Adreno 730", 730); // no "(TM)" variant + expect_version("Adreno(TM)619", 619); // no spaces + + // Case-insensitive (hardening over the raw substring check in fabric-llm). + expect_version("ADRENO 830", 830); + expect_version("adreno 612", 612); + + // Non-Adreno GPUs -> -1 (not Adreno). These are the devices that must keep + // using Vulkan / Metal, never OpenCL. + expect_version("Mali-G715", -1); // Pixel 9 (proven to work on Vulkan) + expect_version("Mali-G78 MP14", -1); + expect_version("NVIDIA GeForce RTX 5090", -1); + expect_version("AMD Radeon (RADV RAPHAEL_MENDOCINO)", -1); + expect_version("Apple M2", -1); + expect_version("llvmpipe (LLVM 20.1.2, 256 bits)", -1); + expect_version("Intel(R) Arc(TM) A770 Graphics", -1); + expect_version("", -1); + + // "dreno" present but no parseable number -> -3 (treated as "no usable + // Adreno version" by the caller; distinct from "not Adreno"). + expect_version("Adreno (TM)", -3); + expect_version("Adreno", -3); + + if (g_failures == 0) { + std::printf("All Adreno-version parsing cases passed.\n"); + return 0; + } + std::printf("%d Adreno-version parsing case(s) failed.\n", g_failures); + return 1; +} From cbe030879e2b93c8dcc0572d2a7344a4b09702bd Mon Sep 17 00:00:00 2001 From: Zbigniew Herman Date: Tue, 2 Jun 2026 11:21:06 +0200 Subject: [PATCH 2/3] ggml-adreno: extract Adreno backend policy into a testable function (review) Code-review follow-up on the Android Adreno backend-selection change: - Factor the OpenCL/Vulkan decision (the 700/600 generation thresholds) out of ggml_backend_load_all_from_path() into ggml_adreno_resolve_backend_policy() in ggml-adreno.h, returning {load_opencl, unload_vulkan}. This makes the policy boundaries unit-testable without a GPU; the loader now just consumes the policy (behaviour unchanged, still mirrors qvac-fabric-llm.cpp). - Add 11 policy cases to test-adreno-version.cpp, pinning the subtle boundaries: Adreno exactly 700 -> CPU only (not > 700), exactly 600 -> OpenCL (not > 600), 701/730/750/830 -> OpenCL, 601..700 -> CPU only, <=600 -> OpenCL, <=0 -> none. - Document the parser's first-digit-run limitation (inherited from fabric-llm): "Adreno X1-85" would parse as 1; that naming is Snapdragon-X (Windows-on-ARM) only, not Android phones (5xx/6xx/7xx/8xx parse correctly). 29/29 test cases pass via ctest; ggml builds clean; ggml-backend-reg.cpp -D__ANDROID__ -fsyntax-only clean. Co-authored-by: Cursor --- src/ggml-adreno.h | 35 +++++++++++++++++++++++++++++++++++ src/ggml-backend-reg.cpp | 10 +++++----- tests/test-adreno-version.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h index 054490591a..2aa0265c2d 100644 --- a/src/ggml-adreno.h +++ b/src/ggml-adreno.h @@ -27,6 +27,11 @@ // The description is lowercased before matching so "Adreno"/"ADRENO"/"adreno" // are all recognised (a small hardening over the raw substring check in // qvac-fabric-llm.cpp; it never produces a false negative for Adreno). +// +// Limitation (inherited from qvac-fabric-llm.cpp): the first digit run wins, so +// a non-numeric model name like "Adreno X1-85" would parse as 1. That naming is +// Snapdragon-X (Windows-on-ARM) only; Android phone Adrenos are 5xx/6xx/7xx/8xx, +// which parse correctly. inline int ggml_adreno_version_from_description(const std::string & gpu_description) { std::string lowered = gpu_description; std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) { @@ -48,3 +53,33 @@ inline int ggml_adreno_version_from_description(const std::string & gpu_descript } return -3; } + +// Android OpenCL/Vulkan backend policy decision, factored out of +// ggml_backend_load_all_from_path() so the version thresholds are unit-testable +// without a GPU. Input is the smallest Adreno generation among the GPU devices +// (the value ggml_backend_min_adreno_version() returns: a positive generation, +// or <= 0 when no Adreno GPU is present). Mirrors qvac-fabric-llm.cpp's ggml +// fork: +// not Adreno (<= 0) -> no OpenCL; keep Vulkan/CPU +// Adreno > 700 -> load OpenCL (kept alongside Vulkan; the consumer picks +// OpenCL over Vulkan -- see transcription-whispercpp) +// Adreno 601..700 -> CPU only: unload Vulkan and don't load OpenCL (both GPU +// paths are unstable on this tier) +// Adreno 1..600 -> load OpenCL (kept alongside Vulkan), matching fabric +struct ggml_adreno_backend_policy { + bool load_opencl; + bool unload_vulkan; +}; + +inline ggml_adreno_backend_policy ggml_adreno_resolve_backend_policy(int min_adreno_version) { + if (min_adreno_version <= 0) { + return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ false }; + } + if (min_adreno_version > 700) { + return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false }; + } + if (min_adreno_version > 600) { + return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true }; + } + return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false }; +} diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp index 68f272b00b..4f6f858a19 100644 --- a/src/ggml-backend-reg.cpp +++ b/src/ggml-backend-reg.cpp @@ -748,19 +748,19 @@ void ggml_backend_load_all_from_path(const char * dir_path) { { ggml_backend_reg_t vulkan_backend = ggml_backend_reg_by_name("vulkan"); const int min_adreno_version = ggml_backend_min_adreno_version(vulkan_backend); + const ggml_adreno_backend_policy policy = ggml_adreno_resolve_backend_policy(min_adreno_version); + load_opencl = policy.load_opencl; if (min_adreno_version <= 0) { GGML_LOG_INFO("%s: no Adreno GPU detected (%d); skipping OpenCL, relying on Vulkan/CPU\n", __func__, min_adreno_version); - load_opencl = false; - } else if (min_adreno_version > 700) { - GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version); - } else if (min_adreno_version > 600) { + } else if (policy.unload_vulkan) { GGML_LOG_INFO("%s: Adreno %d detected; removing Vulkan and relying on CPU only\n", __func__, min_adreno_version); if (vulkan_backend != nullptr) { ggml_backend_unload(vulkan_backend); } - load_opencl = false; + } else if (policy.load_opencl) { + GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version); } } #endif // __ANDROID__ diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp index 40e8068075..225c4421e9 100644 --- a/tests/test-adreno-version.cpp +++ b/tests/test-adreno-version.cpp @@ -24,6 +24,18 @@ static void expect_version(const std::string & description, int expected) { } } +static void expect_policy(int min_adreno_version, bool load_opencl, bool unload_vulkan) { + const ggml_adreno_backend_policy got = ggml_adreno_resolve_backend_policy(min_adreno_version); + if (got.load_opencl != load_opencl || got.unload_vulkan != unload_vulkan) { + std::printf("FAIL: policy(%d) -> {load_opencl=%d, unload_vulkan=%d} (expected {%d, %d})\n", + min_adreno_version, got.load_opencl, got.unload_vulkan, load_opencl, unload_vulkan); + g_failures++; + } else { + std::printf("ok: policy(%d) -> {load_opencl=%d, unload_vulkan=%d}\n", + min_adreno_version, got.load_opencl, got.unload_vulkan); + } +} + int main() { // Real Adreno descriptions (as reported via the Vulkan device name). expect_version("Adreno (TM) 830", 830); // Samsung S25 (Snapdragon 8 Elite) @@ -53,6 +65,24 @@ int main() { expect_version("Adreno (TM)", -3); expect_version("Adreno", -3); + // Backend policy {load_opencl, unload_vulkan} per Adreno generation. + // Non-Adreno / no GPU -> no OpenCL, keep Vulkan/CPU. + expect_policy(-2, false, false); // null Vulkan backend + expect_policy(-1, false, false); // no Adreno GPU (e.g. Mali) + // Adreno 7xx / 8xx -> load OpenCL (Vulkan kept; consumer picks OpenCL). + expect_policy(830, true, false); + expect_policy(750, true, false); + expect_policy(730, true, false); + expect_policy(701, true, false); + // Boundary: exactly 700 is NOT > 700 -> falls to the 601..700 tier (CPU only). + expect_policy(700, false, true); + // Adreno 601..700 -> CPU only (unload Vulkan, no OpenCL). + expect_policy(660, false, true); + expect_policy(601, false, true); + // Boundary: exactly 600 is NOT > 600 -> old-Adreno tier (load OpenCL). + expect_policy(600, true, false); + expect_policy(500, true, false); + if (g_failures == 0) { std::printf("All Adreno-version parsing cases passed.\n"); return 0; From d0d35b5b565c6936bb8d025ee18ddbd0d43460e7 Mon Sep 17 00:00:00 2001 From: Zbigniew Herman Date: Tue, 2 Jun 2026 12:42:16 +0200 Subject: [PATCH 3/3] ggml-adreno: treat Adreno <=600 the same as 601-700 (CPU only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review: only Adreno > 700 has a stable ggml GPU path, so collapse the Adreno <=600 tier into the 601..700 tier — all Adreno 1..700 now resolves to CPU only (unload Vulkan, no OpenCL). This is stricter than qvac-fabric-llm.cpp (which loaded OpenCL on Adreno <=600); older Adreno GPUs are no more capable than the 601..700 tier we already exclude, so there is no reason to expose a GPU backend on them. ggml_adreno_resolve_backend_policy() simplifies to two GPU branches (>700 -> OpenCL, 1..700 -> CPU only) plus the non-Adreno case. Test updated: Adreno 600/500/1 now expect {load_opencl=false, unload_vulkan=true}. 29/29 cases pass; ggml builds clean; -D__ANDROID__ -fsyntax-only clean. Co-authored-by: Cursor --- src/ggml-adreno.h | 20 +++++++++++--------- tests/test-adreno-version.cpp | 12 +++++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h index 2aa0265c2d..16c9d7407e 100644 --- a/src/ggml-adreno.h +++ b/src/ggml-adreno.h @@ -58,14 +58,18 @@ inline int ggml_adreno_version_from_description(const std::string & gpu_descript // ggml_backend_load_all_from_path() so the version thresholds are unit-testable // without a GPU. Input is the smallest Adreno generation among the GPU devices // (the value ggml_backend_min_adreno_version() returns: a positive generation, -// or <= 0 when no Adreno GPU is present). Mirrors qvac-fabric-llm.cpp's ggml -// fork: +// or <= 0 when no Adreno GPU is present). +// // not Adreno (<= 0) -> no OpenCL; keep Vulkan/CPU // Adreno > 700 -> load OpenCL (kept alongside Vulkan; the consumer picks // OpenCL over Vulkan -- see transcription-whispercpp) -// Adreno 601..700 -> CPU only: unload Vulkan and don't load OpenCL (both GPU -// paths are unstable on this tier) -// Adreno 1..600 -> load OpenCL (kept alongside Vulkan), matching fabric +// Adreno 1..700 -> CPU only: unload Vulkan and don't load OpenCL +// (only Adreno 700+ has a stable ggml GPU path) +// +// Note: this is stricter than qvac-fabric-llm.cpp, which loads OpenCL on +// Adreno <= 600. We treat Adreno <= 600 the same as 601..700 (CPU only): older +// Adreno GPUs are no more capable than the 601..700 tier we already exclude, so +// there is no reason to expose a GPU backend on them. struct ggml_adreno_backend_policy { bool load_opencl; bool unload_vulkan; @@ -78,8 +82,6 @@ inline ggml_adreno_backend_policy ggml_adreno_resolve_backend_policy(int min_adr if (min_adreno_version > 700) { return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false }; } - if (min_adreno_version > 600) { - return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true }; - } - return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false }; + // Adreno 1..700 (incl. <= 600): CPU only. + return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true }; } diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp index 225c4421e9..462b51c25d 100644 --- a/tests/test-adreno-version.cpp +++ b/tests/test-adreno-version.cpp @@ -74,14 +74,16 @@ int main() { expect_policy(750, true, false); expect_policy(730, true, false); expect_policy(701, true, false); - // Boundary: exactly 700 is NOT > 700 -> falls to the 601..700 tier (CPU only). + // Boundary: exactly 700 is NOT > 700 -> CPU-only tier. expect_policy(700, false, true); - // Adreno 601..700 -> CPU only (unload Vulkan, no OpenCL). + // Adreno 1..700 -> CPU only (unload Vulkan, no OpenCL). This now includes + // Adreno <= 600, which is treated the same as 601..700 (stricter than + // qvac-fabric-llm.cpp, which loaded OpenCL on <= 600). expect_policy(660, false, true); expect_policy(601, false, true); - // Boundary: exactly 600 is NOT > 600 -> old-Adreno tier (load OpenCL). - expect_policy(600, true, false); - expect_policy(500, true, false); + expect_policy(600, false, true); // <= 600 now CPU-only (was OpenCL) + expect_policy(500, false, true); // <= 600 now CPU-only (was OpenCL) + expect_policy(1, false, true); // smallest positive Adreno -> CPU only if (g_failures == 0) { std::printf("All Adreno-version parsing cases passed.\n");