From 3f9792f48cd022f2be9ea1dc792583fd8056571b Mon Sep 17 00:00:00 2001
From: Zbigniew Herman <zbigniew.herman@tether.io>
Date: Tue, 2 Jun 2026 10:58:26 +0200
Subject: [PATCH 1/3] ggml-backend-reg: Adreno-aware OpenCL backend selection
 on Android

Port the Android GPU backend-selection policy from qvac-fabric-llm.cpp's ggml
fork into the speech ggml fork, so the speech stack (whisper / parakeet / tts)
selects GPU backends the same way the LLM stack does.

Problem: ggml_backend_load_all_from_path() loaded Vulkan AND OpenCL
unconditionally. On Adreno, ggml registers both for the same GPU and Vulkan
loads first, so whisper.cpp's default gpu_device=0 lands on the Adreno Vulkan
driver, which SIGSEGVs in vkCmdBindPipeline during ggml compute (observed on
Samsung S25 Ultra, device-farm run 26646220900). On non-Adreno GPUs (e.g.
Mali) Vulkan is the correct, stable path and OpenCL must not be used.

Fix (mirrors fabric-llm): on Android, after loading Vulkan, use it to detect
the GPU and decide whether to load OpenCL:
  - not Adreno            -> skip OpenCL (Vulkan/CPU), e.g. Mali keeps Vulkan
  - Adreno > 700          -> load OpenCL (Adreno's stable ggml path)
  - Adreno 601-700        -> unload Vulkan, CPU only (both backends buggy)
  - Adreno <= 600         -> load OpenCL (unchanged fall-through)
Implemented via ggml_backend_reg_by_name("vulkan") +
ggml_backend_min_adreno_version() (using the public ggml_backend_reg_dev_*
/ ggml_backend_dev_description APIs) + ggml_backend_unload().

Off Android the behaviour is unchanged and byte-identical: `load_opencl`
stays true and OpenCL is loaded in exactly the same position as before (right
after virtgpu, before hexagon). The whole Adreno block is `#ifdef __ANDROID__`.

The pure GPU-string parser is extracted to a header (src/ggml-adreno.h,
inline, dependency-free) so it can be unit-tested without a GPU; it also
lowercases the description (a small hardening over fabric-llm's raw substring
check). New tests/test-adreno-version.cpp covers 18 cases (Adreno variants,
Mali/NVIDIA/AMD/Apple/Intel/llvmpipe non-Adreno, empty, and unparseable);
wired into ctest as test-adreno-version (always built, no ggml link).

Validation (x64-linux):
  - ggml lib builds clean (ggml-backend-reg.cpp.o)
  - ggml-backend-reg.cpp -D__ANDROID__ -fsyntax-only: clean (Android block compiles)
  - test-adreno-version: 18/18 pass via ctest
The Adreno runtime selection itself is validated end-to-end on the device
farm via transcription-whispercpp's GPU test (Samsung S25 -> OpenCL).

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 src/ggml-adreno.h             | 50 +++++++++++++++++++++++++++
 src/ggml-backend-reg.cpp      | 65 ++++++++++++++++++++++++++++++++++-
 tests/CMakeLists.txt          | 12 +++++++
 tests/test-adreno-version.cpp | 62 +++++++++++++++++++++++++++++++++
 4 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 src/ggml-adreno.h
 create mode 100644 tests/test-adreno-version.cpp
diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h
new file mode 100644
index 0000000000..054490591a
--- /dev/null
+++ b/src/ggml-adreno.h
@@ -0,0 +1,50 @@
+#pragma once
+
+// Adreno GPU detection helper, shared between the Android backend-selection
+// logic in ggml-backend-reg.cpp and its unit test (tests/test-adreno-version.cpp).
+//
+// Header-only (inline) and dependency-free so the unit test can exercise the
+// pure parsing logic without linking the ggml runtime or needing a GPU.
+//
+// Mirrors the Adreno-version policy used by qvac-fabric-llm.cpp's ggml fork so
+// the speech stack (whisper / parakeet / tts) selects GPU backends the same
+// way the LLM stack does: on Android, detect the GPU through Vulkan (present
+// on virtually every Android GPU) and only fall back to OpenCL for Adreno,
+// whose Vulkan compute path is unstable.
+
+#include <algorithm>
+#include <cctype>
+#include <regex>
+#include <string>
+
+// Parse the Adreno GPU generation from a ggml device description.
+//   e.g. "Adreno (TM) 830" -> 830, "Adreno 730" -> 730
+// Returns:
+//   > 0 : the parsed Adreno generation number
+//   -1  : the description is not an Adreno GPU
+//   -3  : the description is an Adreno GPU but the version failed to parse
+//
+// The description is lowercased before matching so "Adreno"/"ADRENO"/"adreno"
+// are all recognised (a small hardening over the raw substring check in
+// qvac-fabric-llm.cpp; it never produces a false negative for Adreno).
+inline int ggml_adreno_version_from_description(const std::string & gpu_description) {
+    std::string lowered = gpu_description;
+    std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) {
+        return static_cast<char>(std::tolower(c));
+    });
+
+    if (lowered.find("dreno") == std::string::npos) {
+        return -1;
+    }
+
+    static const std::regex digits_regex(R"((\d+))");
+    std::smatch matches;
+    if (std::regex_search(lowered, matches, digits_regex) && matches.size() > 1) {
+        try {
+            return std::stoi(matches[1].str());
+        } catch (const std::exception &) {
+            return -3;
+        }
+    }
+    return -3;
+}
diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp
index 5ac3549dfc..68f272b00b 100644
--- a/src/ggml-backend-reg.cpp
+++ b/src/ggml-backend-reg.cpp
@@ -2,9 +2,11 @@
 #include "ggml-backend.h"
 #include "ggml-backend-dl.h"
 #include "ggml-impl.h"
+#include "ggml-adreno.h"
 #include <algorithm>
 #include <cstring>
 #include <filesystem>
+#include <limits>
 #include <memory>
 #include <string>
 #include <type_traits>
@@ -687,6 +689,35 @@ void ggml_backend_load_all() {
     ggml_backend_load_all_from_path(nullptr);
 }
 
+#ifdef __ANDROID__
+namespace {
+// Smallest Adreno generation among the GPU devices a (Vulkan) backend exposes,
+// or a negative sentinel: -2 if `reg` is null, -1 if no Adreno GPU is present.
+// Vulkan is used as the probe because it is present on virtually every Android
+// GPU, so the GPU can be identified before deciding whether to load OpenCL.
+// Mirrors qvac-fabric-llm.cpp's ggml fork (the LLM stack's backend selection).
+int ggml_backend_min_adreno_version(ggml_backend_reg_t reg) {
+    if (reg == nullptr) {
+        return -2;
+    }
+    int min_found = std::numeric_limits<int>::max();
+    for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
+        ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, i);
+        if (dev == nullptr) {
+            continue;
+        }
+        const char * description = ggml_backend_dev_description(dev);
+        GGML_LOG_INFO("%s: found device description: %s\n", __func__, description ? description : "(null)");
+        const int dev_adreno_version = ggml_adreno_version_from_description(description ? description : "");
+        if (dev_adreno_version > 0) {
+            min_found = std::min(min_found, dev_adreno_version);
+        }
+    }
+    return (min_found < std::numeric_limits<int>::max()) ? min_found : -1;
+}
+} // namespace
+#endif // __ANDROID__
+
 void ggml_backend_load_all_from_path(const char * dir_path) {
 #ifdef NDEBUG
     bool silent = true;
@@ -704,7 +735,39 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
     ggml_backend_load_best("sycl", silent, dir_path);
     ggml_backend_load_best("vulkan", silent, dir_path);
     ggml_backend_load_best("virtgpu", silent, dir_path);
-    ggml_backend_load_best("opencl", silent, dir_path);
+
+    // OpenCL is only useful (and stable) for ggml on Adreno GPUs; on every
+    // other GPU the Adreno-tuned OpenCL kernels are either unsupported or buggy.
+    // On Android, use the already-loaded Vulkan backend to detect the GPU and
+    // only keep OpenCL for an Adreno that benefits from it. This mirrors
+    // qvac-fabric-llm.cpp's ggml fork so the speech stack selects backends the
+    // same way the LLM stack does. Off Android (or when no Vulkan backend is
+    // present) behaviour is unchanged: OpenCL is loaded unconditionally here.
+    bool load_opencl = true;
+#ifdef __ANDROID__
+    {
+        ggml_backend_reg_t vulkan_backend = ggml_backend_reg_by_name("vulkan");
+        const int min_adreno_version = ggml_backend_min_adreno_version(vulkan_backend);
+        if (min_adreno_version <= 0) {
+            GGML_LOG_INFO("%s: no Adreno GPU detected (%d); skipping OpenCL, relying on Vulkan/CPU\n",
+                          __func__, min_adreno_version);
+            load_opencl = false;
+        } else if (min_adreno_version > 700) {
+            GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version);
+        } else if (min_adreno_version > 600) {
+            GGML_LOG_INFO("%s: Adreno %d detected; removing Vulkan and relying on CPU only\n",
+                          __func__, min_adreno_version);
+            if (vulkan_backend != nullptr) {
+                ggml_backend_unload(vulkan_backend);
+            }
+            load_opencl = false;
+        }
+    }
+#endif // __ANDROID__
+    if (load_opencl) {
+        ggml_backend_load_best("opencl", silent, dir_path);
+    }
+
     ggml_backend_load_best("hexagon", silent, dir_path);
     ggml_backend_load_best("musa", silent, dir_path);
     ggml_backend_load_best("openvino", silent, dir_path);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7c28e344c5..cbaf33c57c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -168,6 +168,18 @@ target_link_libraries(${TEST_TARGET} PRIVATE ggml Threads::Threads)
 add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
 set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
 
+#
+# test-adreno-version
+# Pure unit test for src/ggml-adreno.h (Adreno GPU-string parser used by the
+# Android OpenCL/Vulkan backend-selection policy). Header-only: needs the src/
+# include path but does not link the ggml runtime. Always built.
+
+set(TEST_TARGET test-adreno-version)
+add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
+target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
+add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
+set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
+
 
 if (NOT GGML_BACKEND_DL)
     #
diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp
new file mode 100644
index 0000000000..40e8068075
--- /dev/null
+++ b/tests/test-adreno-version.cpp
@@ -0,0 +1,62 @@
+// Unit test for ggml_adreno_version_from_description() — the pure GPU-string
+// parser behind the Android Adreno backend-selection policy in
+// ggml-backend-reg.cpp (see src/ggml-adreno.h).
+//
+// Pure / header-only: does not link the ggml runtime or need a GPU, so it runs
+// on every host/CI. The hardware-dependent half of the policy
+// (ggml_backend_min_adreno_version + the OpenCL load decision) is exercised
+// end-to-end on the device farm via transcription-whispercpp's GPU test.
+
+#include "ggml-adreno.h"
+
+#include <cstdio>
+#include <string>
+
+static int g_failures = 0;
+
+static void expect_version(const std::string & description, int expected) {
+    const int got = ggml_adreno_version_from_description(description);
+    if (got != expected) {
+        std::printf("FAIL: \"%s\" -> %d (expected %d)\n", description.c_str(), got, expected);
+        g_failures++;
+    } else {
+        std::printf("ok:   \"%s\" -> %d\n", description.c_str(), got);
+    }
+}
+
+int main() {
+    // Real Adreno descriptions (as reported via the Vulkan device name).
+    expect_version("Adreno (TM) 830", 830);   // Samsung S25 (Snapdragon 8 Elite)
+    expect_version("Adreno (TM) 750", 750);   // Snapdragon 8 Gen 3
+    expect_version("Adreno (TM) 740", 740);
+    expect_version("Adreno (TM) 660", 660);
+    expect_version("Adreno 730", 730);        // no "(TM)" variant
+    expect_version("Adreno(TM)619", 619);     // no spaces
+
+    // Case-insensitive (hardening over the raw substring check in fabric-llm).
+    expect_version("ADRENO 830", 830);
+    expect_version("adreno 612", 612);
+
+    // Non-Adreno GPUs -> -1 (not Adreno). These are the devices that must keep
+    // using Vulkan / Metal, never OpenCL.
+    expect_version("Mali-G715", -1);          // Pixel 9 (proven to work on Vulkan)
+    expect_version("Mali-G78 MP14", -1);
+    expect_version("NVIDIA GeForce RTX 5090", -1);
+    expect_version("AMD Radeon (RADV RAPHAEL_MENDOCINO)", -1);
+    expect_version("Apple M2", -1);
+    expect_version("llvmpipe (LLVM 20.1.2, 256 bits)", -1);
+    expect_version("Intel(R) Arc(TM) A770 Graphics", -1);
+    expect_version("", -1);
+
+    // "dreno" present but no parseable number -> -3 (treated as "no usable
+    // Adreno version" by the caller; distinct from "not Adreno").
+    expect_version("Adreno (TM)", -3);
+    expect_version("Adreno", -3);
+
+    if (g_failures == 0) {
+        std::printf("All Adreno-version parsing cases passed.\n");
+        return 0;
+    }
+    std::printf("%d Adreno-version parsing case(s) failed.\n", g_failures);
+    return 1;
+}

From cbe030879e2b93c8dcc0572d2a7344a4b09702bd Mon Sep 17 00:00:00 2001
From: Zbigniew Herman <zbigniew.herman@tether.io>
Date: Tue, 2 Jun 2026 11:21:06 +0200
Subject: [PATCH 2/3] ggml-adreno: extract Adreno backend policy into a
 testable function (review)

Code-review follow-up on the Android Adreno backend-selection change:

- Factor the OpenCL/Vulkan decision (the 700/600 generation thresholds) out of
  ggml_backend_load_all_from_path() into ggml_adreno_resolve_backend_policy()
  in ggml-adreno.h, returning {load_opencl, unload_vulkan}. This makes the
  policy boundaries unit-testable without a GPU; the loader now just consumes
  the policy (behaviour unchanged, still mirrors qvac-fabric-llm.cpp).
- Add 11 policy cases to test-adreno-version.cpp, pinning the subtle boundaries:
  Adreno exactly 700 -> CPU only (not > 700), exactly 600 -> OpenCL (not > 600),
  701/730/750/830 -> OpenCL, 601..700 -> CPU only, <=600 -> OpenCL, <=0 -> none.
- Document the parser's first-digit-run limitation (inherited from fabric-llm):
  "Adreno X1-85" would parse as 1; that naming is Snapdragon-X (Windows-on-ARM)
  only, not Android phones (5xx/6xx/7xx/8xx parse correctly).

29/29 test cases pass via ctest; ggml builds clean; ggml-backend-reg.cpp
-D__ANDROID__ -fsyntax-only clean.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 src/ggml-adreno.h             | 35 +++++++++++++++++++++++++++++++++++
 src/ggml-backend-reg.cpp      | 10 +++++-----
 tests/test-adreno-version.cpp | 30 ++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h
index 054490591a..2aa0265c2d 100644
--- a/src/ggml-adreno.h
+++ b/src/ggml-adreno.h
@@ -27,6 +27,11 @@
 // The description is lowercased before matching so "Adreno"/"ADRENO"/"adreno"
 // are all recognised (a small hardening over the raw substring check in
 // qvac-fabric-llm.cpp; it never produces a false negative for Adreno).
+//
+// Limitation (inherited from qvac-fabric-llm.cpp): the first digit run wins, so
+// a non-numeric model name like "Adreno X1-85" would parse as 1. That naming is
+// Snapdragon-X (Windows-on-ARM) only; Android phone Adrenos are 5xx/6xx/7xx/8xx,
+// which parse correctly.
 inline int ggml_adreno_version_from_description(const std::string & gpu_description) {
     std::string lowered = gpu_description;
     std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) {
@@ -48,3 +53,33 @@ inline int ggml_adreno_version_from_description(const std::string & gpu_descript
     }
     return -3;
 }
+
+// Android OpenCL/Vulkan backend policy decision, factored out of
+// ggml_backend_load_all_from_path() so the version thresholds are unit-testable
+// without a GPU. Input is the smallest Adreno generation among the GPU devices
+// (the value ggml_backend_min_adreno_version() returns: a positive generation,
+// or <= 0 when no Adreno GPU is present). Mirrors qvac-fabric-llm.cpp's ggml
+// fork:
+//   not Adreno (<= 0) -> no OpenCL; keep Vulkan/CPU
+//   Adreno > 700      -> load OpenCL (kept alongside Vulkan; the consumer picks
+//                        OpenCL over Vulkan -- see transcription-whispercpp)
+//   Adreno 601..700   -> CPU only: unload Vulkan and don't load OpenCL (both GPU
+//                        paths are unstable on this tier)
+//   Adreno 1..600     -> load OpenCL (kept alongside Vulkan), matching fabric
+struct ggml_adreno_backend_policy {
+    bool load_opencl;
+    bool unload_vulkan;
+};
+
+inline ggml_adreno_backend_policy ggml_adreno_resolve_backend_policy(int min_adreno_version) {
+    if (min_adreno_version <= 0) {
+        return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ false };
+    }
+    if (min_adreno_version > 700) {
+        return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false };
+    }
+    if (min_adreno_version > 600) {
+        return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true };
+    }
+    return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false };
+}
diff --git a/src/ggml-backend-reg.cpp b/src/ggml-backend-reg.cpp
index 68f272b00b..4f6f858a19 100644
--- a/src/ggml-backend-reg.cpp
+++ b/src/ggml-backend-reg.cpp
@@ -748,19 +748,19 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
     {
         ggml_backend_reg_t vulkan_backend = ggml_backend_reg_by_name("vulkan");
         const int min_adreno_version = ggml_backend_min_adreno_version(vulkan_backend);
+        const ggml_adreno_backend_policy policy = ggml_adreno_resolve_backend_policy(min_adreno_version);
+        load_opencl = policy.load_opencl;
         if (min_adreno_version <= 0) {
             GGML_LOG_INFO("%s: no Adreno GPU detected (%d); skipping OpenCL, relying on Vulkan/CPU\n",
                           __func__, min_adreno_version);
-            load_opencl = false;
-        } else if (min_adreno_version > 700) {
-            GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version);
-        } else if (min_adreno_version > 600) {
+        } else if (policy.unload_vulkan) {
             GGML_LOG_INFO("%s: Adreno %d detected; removing Vulkan and relying on CPU only\n",
                           __func__, min_adreno_version);
             if (vulkan_backend != nullptr) {
                 ggml_backend_unload(vulkan_backend);
             }
-            load_opencl = false;
+        } else if (policy.load_opencl) {
+            GGML_LOG_INFO("%s: Adreno %d detected; keeping OpenCL backend\n", __func__, min_adreno_version);
         }
     }
 #endif // __ANDROID__
diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp
index 40e8068075..225c4421e9 100644
--- a/tests/test-adreno-version.cpp
+++ b/tests/test-adreno-version.cpp
@@ -24,6 +24,18 @@ static void expect_version(const std::string & description, int expected) {
     }
 }
 
+static void expect_policy(int min_adreno_version, bool load_opencl, bool unload_vulkan) {
+    const ggml_adreno_backend_policy got = ggml_adreno_resolve_backend_policy(min_adreno_version);
+    if (got.load_opencl != load_opencl || got.unload_vulkan != unload_vulkan) {
+        std::printf("FAIL: policy(%d) -> {load_opencl=%d, unload_vulkan=%d} (expected {%d, %d})\n",
+                    min_adreno_version, got.load_opencl, got.unload_vulkan, load_opencl, unload_vulkan);
+        g_failures++;
+    } else {
+        std::printf("ok:   policy(%d) -> {load_opencl=%d, unload_vulkan=%d}\n",
+                    min_adreno_version, got.load_opencl, got.unload_vulkan);
+    }
+}
+
 int main() {
     // Real Adreno descriptions (as reported via the Vulkan device name).
     expect_version("Adreno (TM) 830", 830);   // Samsung S25 (Snapdragon 8 Elite)
@@ -53,6 +65,24 @@ int main() {
     expect_version("Adreno (TM)", -3);
     expect_version("Adreno", -3);
 
+    // Backend policy {load_opencl, unload_vulkan} per Adreno generation.
+    // Non-Adreno / no GPU -> no OpenCL, keep Vulkan/CPU.
+    expect_policy(-2, false, false);   // null Vulkan backend
+    expect_policy(-1, false, false);   // no Adreno GPU (e.g. Mali)
+    // Adreno 7xx / 8xx -> load OpenCL (Vulkan kept; consumer picks OpenCL).
+    expect_policy(830, true, false);
+    expect_policy(750, true, false);
+    expect_policy(730, true, false);
+    expect_policy(701, true, false);
+    // Boundary: exactly 700 is NOT > 700 -> falls to the 601..700 tier (CPU only).
+    expect_policy(700, false, true);
+    // Adreno 601..700 -> CPU only (unload Vulkan, no OpenCL).
+    expect_policy(660, false, true);
+    expect_policy(601, false, true);
+    // Boundary: exactly 600 is NOT > 600 -> old-Adreno tier (load OpenCL).
+    expect_policy(600, true, false);
+    expect_policy(500, true, false);
+
     if (g_failures == 0) {
         std::printf("All Adreno-version parsing cases passed.\n");
         return 0;

From d0d35b5b565c6936bb8d025ee18ddbd0d43460e7 Mon Sep 17 00:00:00 2001
From: Zbigniew Herman <zbigniew.herman@tether.io>
Date: Tue, 2 Jun 2026 12:42:16 +0200
Subject: [PATCH 3/3] ggml-adreno: treat Adreno <=600 the same as 601-700 (CPU
 only)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per review: only Adreno > 700 has a stable ggml GPU path, so collapse the
Adreno <=600 tier into the 601..700 tier — all Adreno 1..700 now resolves to
CPU only (unload Vulkan, no OpenCL). This is stricter than qvac-fabric-llm.cpp
(which loaded OpenCL on Adreno <=600); older Adreno GPUs are no more capable
than the 601..700 tier we already exclude, so there is no reason to expose a
GPU backend on them.

ggml_adreno_resolve_backend_policy() simplifies to two GPU branches (>700 ->
OpenCL, 1..700 -> CPU only) plus the non-Adreno case. Test updated: Adreno
600/500/1 now expect {load_opencl=false, unload_vulkan=true}.

29/29 cases pass; ggml builds clean; -D__ANDROID__ -fsyntax-only clean.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 src/ggml-adreno.h             | 20 +++++++++++---------
 tests/test-adreno-version.cpp | 12 +++++++-----
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/ggml-adreno.h b/src/ggml-adreno.h
index 2aa0265c2d..16c9d7407e 100644
--- a/src/ggml-adreno.h
+++ b/src/ggml-adreno.h
@@ -58,14 +58,18 @@ inline int ggml_adreno_version_from_description(const std::string & gpu_descript
 // ggml_backend_load_all_from_path() so the version thresholds are unit-testable
 // without a GPU. Input is the smallest Adreno generation among the GPU devices
 // (the value ggml_backend_min_adreno_version() returns: a positive generation,
-// or <= 0 when no Adreno GPU is present). Mirrors qvac-fabric-llm.cpp's ggml
-// fork:
+// or <= 0 when no Adreno GPU is present).
+//
 //   not Adreno (<= 0) -> no OpenCL; keep Vulkan/CPU
 //   Adreno > 700      -> load OpenCL (kept alongside Vulkan; the consumer picks
 //                        OpenCL over Vulkan -- see transcription-whispercpp)
-//   Adreno 601..700   -> CPU only: unload Vulkan and don't load OpenCL (both GPU
-//                        paths are unstable on this tier)
-//   Adreno 1..600     -> load OpenCL (kept alongside Vulkan), matching fabric
+//   Adreno 1..700     -> CPU only: unload Vulkan and don't load OpenCL
+//                        (only Adreno 700+ has a stable ggml GPU path)
+//
+// Note: this is stricter than qvac-fabric-llm.cpp, which loads OpenCL on
+// Adreno <= 600. We treat Adreno <= 600 the same as 601..700 (CPU only): older
+// Adreno GPUs are no more capable than the 601..700 tier we already exclude, so
+// there is no reason to expose a GPU backend on them.
 struct ggml_adreno_backend_policy {
     bool load_opencl;
     bool unload_vulkan;
@@ -78,8 +82,6 @@ inline ggml_adreno_backend_policy ggml_adreno_resolve_backend_policy(int min_adr
     if (min_adreno_version > 700) {
         return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false };
     }
-    if (min_adreno_version > 600) {
-        return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true };
-    }
-    return ggml_adreno_backend_policy{ /*load_opencl=*/ true, /*unload_vulkan=*/ false };
+    // Adreno 1..700 (incl. <= 600): CPU only.
+    return ggml_adreno_backend_policy{ /*load_opencl=*/ false, /*unload_vulkan=*/ true };
 }
diff --git a/tests/test-adreno-version.cpp b/tests/test-adreno-version.cpp
index 225c4421e9..462b51c25d 100644
--- a/tests/test-adreno-version.cpp
+++ b/tests/test-adreno-version.cpp
@@ -74,14 +74,16 @@ int main() {
     expect_policy(750, true, false);
     expect_policy(730, true, false);
     expect_policy(701, true, false);
-    // Boundary: exactly 700 is NOT > 700 -> falls to the 601..700 tier (CPU only).
+    // Boundary: exactly 700 is NOT > 700 -> CPU-only tier.
     expect_policy(700, false, true);
-    // Adreno 601..700 -> CPU only (unload Vulkan, no OpenCL).
+    // Adreno 1..700 -> CPU only (unload Vulkan, no OpenCL). This now includes
+    // Adreno <= 600, which is treated the same as 601..700 (stricter than
+    // qvac-fabric-llm.cpp, which loaded OpenCL on <= 600).
     expect_policy(660, false, true);
     expect_policy(601, false, true);
-    // Boundary: exactly 600 is NOT > 600 -> old-Adreno tier (load OpenCL).
-    expect_policy(600, true, false);
-    expect_policy(500, true, false);
+    expect_policy(600, false, true);   // <= 600 now CPU-only (was OpenCL)
+    expect_policy(500, false, true);   // <= 600 now CPU-only (was OpenCL)
+    expect_policy(1, false, true);     // smallest positive Adreno -> CPU only
 
     if (g_failures == 0) {
         std::printf("All Adreno-version parsing cases passed.\n");