tetherto · pratiknarola-t · May 29, 2026
@@ -212,24 +212,38 @@ void ensure_backends_loaded() {
 // reach the same decision on the same hardware.
 int parse_adreno_version(const char * s) {
     if (!s) return -1;
-    const char * p = std::strstr(s, "Adreno");
-    if (!p) p = std::strstr(s, "adreno");
-    if (!p) return -1;
-    p += 6; // strlen("Adreno") == strlen("adreno") == 6
-    while (*p && !(*p >= '0' && *p <= '9') && *p != 'X' && *p != 'x') ++p;
-    if (!*p) return -1;
-    if (*p == 'X' || *p == 'x') {
-        ++p;
-        if (*p < '0' || *p > '9') return -1; // "Xclipse" etc. is not Adreno-X
-        return 800;
-    }
-    int v = 0;
-    while (*p >= '0' && *p <= '9') {
-        v = v * 10 + (*p - '0');
-        ++p;
-        if (v > 100000) return -1;
+    // Scan EVERY "Adreno"/"adreno" marker and keep the largest plausible
+    // (>= 100, i.e. real 3-digit model) version found. Some OpenCL device
+    // strings embed the API version before the model number, e.g.
+    // "QUALCOMM Adreno(TM) (OpenCL 3.0 Adreno(TM) 740)": parsing only the
+    // first marker yields 3 (from "OpenCL 3.0") and mis-tiers the GPU below
+    // Vulkan; the second "Adreno 740" marker recovers the real version.
+    int best = -1;
+    for (const char * p = s; *p; ++p) {
+        if (std::strncmp(p, "Adreno", 6) != 0 &&
+            std::strncmp(p, "adreno", 6) != 0) {
+            continue;
+        }
+        const char * q = p + 6; // strlen("Adreno") == strlen("adreno") == 6
+        while (*q && !(*q >= '0' && *q <= '9') && *q != 'X' && *q != 'x') ++q;
+        if (!*q) continue;
+        if (*q == 'X' || *q == 'x') {
+            if (*(q + 1) >= '0' && *(q + 1) <= '9') { // "Adreno X1-..." family
+                if (800 > best) best = 800;
+            }
+            continue; // "Xclipse" etc. is not Adreno-X
+        }
+        int v = 0;
+        bool overflow = false;
+        while (*q >= '0' && *q <= '9') {
+            v = v * 10 + (*q - '0');
+            ++q;
+            if (v > 100000) { overflow = true; break; }
+        }
+        // Adreno models are 3-digit; ignore API-version noise like "OpenCL 3.0".
+        if (!overflow && v >= 100 && v > best) best = v;
     }
-    return v;
+    return best;
 }
 
 bool is_adreno_6xx(const char * s) {
@@ -242,14 +256,48 @@ bool is_adreno_700plus(const char * s) {
     return v >= 700;
 }
 
+// True if the device name/description identifies a Qualcomm Adreno GPU.
+// Unlike parse_adreno_version (which needs a 3-digit model number and so
+// returns -1 for the bare OpenCL "QUALCOMM Adreno(TM)" string), this is a
+// vendor check used to gate Android GPU selection. ASCII case-insensitive
+// because the strings vary in capitalisation: ggml-opencl reports
+// CL_DEVICE_NAME ("QUALCOMM Adreno(TM)") and ggml-vulkan reports the Vulkan
+// deviceName ("Adreno (TM) 740").
+bool is_qualcomm_adreno(const char * name, const char * desc) {
+    auto contains_ci = [](const char * hay, const char * needle) -> bool {
+        if (!hay || !needle) return false;
+        for (const char * h = hay; *h; ++h) {
+            const char * a = h;
+            const char * b = needle;
+            while (*a && *b) {
+                const char ca = (*a >= 'A' && *a <= 'Z') ? char(*a + 32) : *a;
+                const char cb = (*b >= 'A' && *b <= 'Z') ? char(*b + 32) : *b;
+                if (ca != cb) break;
+                ++a;
+                ++b;
+            }
+            if (!*b) return true;
+        }
+        return false;
+    };
+    return contains_ci(name, "adreno")   || contains_ci(desc, "adreno") ||
+           contains_ci(name, "qualcomm") || contains_ci(desc, "qualcomm");
+}
+
 // Pick a GPU backend using the same tier policy as parakeet-cpp's
 // `init_gpu_backend` / llm-llamacpp's BackendSelection: ggml-opencl
 // is only used when an Adreno 700+ device is present (where its
 // kernels are validated and faster than Vulkan); every other GPU
-// (Vulkan, Metal, CUDA, Mali, Intel iGPU, ...) goes through the
-// non-OpenCL preference. Adreno 6xx OpenCL is known broken
-// (incorrect outputs) and is force-skipped unless the caller opts
-// in via `TTS_CPP_ALLOW_ADRENO_6XX=1`.
+// (Vulkan, Metal, CUDA, Intel iGPU, ...) goes through the non-OpenCL
+// preference. Adreno 6xx OpenCL is known broken (incorrect outputs)
+// and is force-skipped unless the caller opts in via
+// `TTS_CPP_ALLOW_ADRENO_6XX=1`.
+//
+// On Android the device walk is additionally gated to Qualcomm Adreno
+// only: other Android GPU vendors are not validated and at least one
+// (ARM Mali / Tensor) aborts the host process from inside graph
+// compute, so they are skipped and the engine falls back to CPU.
+// Desktop GPU vendors are unaffected.
 //
 // Routed exclusively through the ggml-backend registry
 // (`ggml_backend_load_all` + `ggml_backend_dev_*`). No direct calls
@@ -292,6 +340,29 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
         const char * reg_name = dev_reg_name(dev);
         const bool   is_opencl = reg_name && std::strcmp(reg_name, "OpenCL") == 0;
 
+#if defined(__ANDROID__)
+        // Android GPU allowlist: only Qualcomm Adreno is validated for the
+        // tts-cpp GPU backends (OpenCL on Adreno 700+, Vulkan as the
+        // bring-up fallback). Other Android GPU vendors are not validated,
+        // and at least one (ARM Mali / Tensor) aborts the whole host
+        // process from inside ggml_backend_graph_compute via GGML_ASSERT ->
+        // ggml_abort(), which cannot be caught from C++. Skip non-Adreno
+        // devices so the policy falls through to CPU instead of risking a
+        // fatal abort on an unvalidated driver.
+        if (!is_qualcomm_adreno(name, desc)) {
+            if (verbose) {
+                fprintf(stderr,
+                    "%s: Android GPU '%s' (%s) is not Qualcomm Adreno; "
+                    "skipping (only Adreno is validated on Android; "
+                    "falling through to CPU)\n",
+                    log_prefix,
+                    name ? name : "?",
+                    desc ? desc : "?");
+            }
+            continue;
+        }
+#endif
+
         const int adreno_v = std::max(parse_adreno_version(name),
                                       parse_adreno_version(desc));
         if (adreno_v > max_adreno_version) max_adreno_version = adreno_v;
@@ -331,10 +402,11 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
     //   1. Adreno 700+: prefer OpenCL (validated, faster than Vulkan
     //      on Snapdragon 8 Gen 2/3/4 etc.).
     //   2. Anything else with a non-OpenCL GPU: prefer that
-    //      (Vulkan on all non-Adreno Android, Metal on Apple, CUDA
-    //      on Linux/Windows desktop, Mali iGPU via Vulkan, ...).
-    //   3. Last resort: any other OpenCL device (e.g. desktop OpenCL
-    //      or non-Adreno mobile when no Vulkan is registered).
+    //      (Adreno Vulkan on Android — non-Adreno is filtered out
+    //      above; Metal on Apple; CUDA / Vulkan on Linux/Windows
+    //      desktop).
+    //   3. Last resort: any other OpenCL device (e.g. desktop OpenCL,
+    //      or Adreno OpenCL whose version string lacked a model number).
     auto try_init = [&](const std::vector<Cand> & bucket) -> ggml_backend_t {
         for (const Cand & c : bucket) {
             ggml_backend_t b = ggml_backend_dev_init(c.dev, nullptr);

@@ -87,4 +87,10 @@ int parse_adreno_version(const char * s);
 bool is_adreno_6xx(const char * s);
 bool is_adreno_700plus(const char * s);
 
+// Vendor check (name OR description, ASCII case-insensitive): true for a
+// Qualcomm Adreno GPU. Unlike parse_adreno_version it does not require a
+// model number, so it also matches the bare OpenCL "QUALCOMM Adreno(TM)"
+// string. Used to gate Android GPU selection to the only validated vendor.
+bool is_qualcomm_adreno(const char * name, const char * desc);
+
 } // namespace tts_cpp::detail
@@ -320,6 +320,7 @@ struct cli_params {
     std::string tokens_file;     // optional pre-tokenized speech tokens (skips T3)
     std::string text;            // input text for T3
     std::string output;          // legacy: speech-tokens output file (if set, write tokens)
+    std::string dump_mel_path;   // optional: dump S3Gen intermediates (_mu/_step0_dxdt/mel) to .npy for debugging
     // S3Gen + HiFT vocoder:
     std::string s3gen_gguf;      // enables full text → wav pipeline
     std::string out_wav;         // wav output path (requires --s3gen-gguf)
@@ -450,6 +451,7 @@ static void print_usage(const char * argv0) {
     fprintf(stderr, "                          With --s3gen-gguf this is interpreted as *speech* tokens\n");
     fprintf(stderr, "                          and the T3 step is skipped.\n");
     fprintf(stderr, "  --output PATH           Write generated speech tokens to PATH (text mode).\n");
+    fprintf(stderr, "  --dump-mel-path PATH    Debug: dump S3Gen mel to PATH, encoder to PATH_mu.npy, CFM step0 to PATH_step0_dxdt.npy.\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "  --s3gen-gguf PATH       Enables the full text -> wav pipeline (S3Gen + HiFT).\n");
     fprintf(stderr, "  --out PATH              Output wav file when --s3gen-gguf is set.\n");
@@ -590,6 +592,7 @@ static bool parse_args(int argc, char ** argv, cli_params & params) {
         else if (arg == "--text")           { auto v = next("--text");           if (!v) return false; params.text = v; }
         else if (arg == "--tokens-file")    { auto v = next("--tokens-file");    if (!v) return false; params.tokens_file = v; }
         else if (arg == "--output")         { auto v = next("--output");         if (!v) return false; params.output = v; }
+        else if (arg == "--dump-mel-path")  { auto v = next("--dump-mel-path");   if (!v) return false; params.dump_mel_path = v; }
         else if (arg == "--s3gen-gguf")     { auto v = next("--s3gen-gguf");     if (!v) return false; params.s3gen_gguf = v; }
         else if (arg == "--out")            { auto v = next("--out");            if (!v) return false; params.out_wav = v; }
         else if (arg == "--ref-dir")        { auto v = next("--ref-dir");        if (!v) return false; params.ref_dir = v; }
@@ -982,6 +985,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
             opts.verbose         = params.verbose;
             opts.n_gpu_layers    = params.n_gpu_layers;
             opts.cfm_steps       = params.cfm_steps;
+            opts.dump_mel_path   = params.dump_mel_path;
             opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
             if (!params.reference_audio.empty()) {
                 if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
@@ -1265,6 +1269,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
             // chunk; --cfm-steps falls in as the per-chunk default below
             // (`stream_cfm_steps > 0 ? stream_cfm_steps : cfm_steps`).
             opts.cfm_steps       = params.cfm_steps;
+            opts.dump_mel_path   = params.dump_mel_path;
             opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
             if (!params.reference_audio.empty()) {
                 if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
@@ -2063,6 +2068,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
             // Streaming chunks honour --stream-cfm-steps with --cfm-steps as
             // fallback when copts is set up further below.
             opts.cfm_steps       = params.cfm_steps;
+            opts.dump_mel_path   = params.dump_mel_path;
             opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
             if (!params.reference_audio.empty()) {
                 if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,