Skip to content
122 changes: 97 additions & 25 deletions tts-cpp/src/backend_selection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,24 +212,38 @@ void ensure_backends_loaded() {
// reach the same decision on the same hardware.
int parse_adreno_version(const char * s) {
if (!s) return -1;
const char * p = std::strstr(s, "Adreno");
if (!p) p = std::strstr(s, "adreno");
if (!p) return -1;
p += 6; // strlen("Adreno") == strlen("adreno") == 6
while (*p && !(*p >= '0' && *p <= '9') && *p != 'X' && *p != 'x') ++p;
if (!*p) return -1;
if (*p == 'X' || *p == 'x') {
++p;
if (*p < '0' || *p > '9') return -1; // "Xclipse" etc. is not Adreno-X
return 800;
}
int v = 0;
while (*p >= '0' && *p <= '9') {
v = v * 10 + (*p - '0');
++p;
if (v > 100000) return -1;
// Scan EVERY "Adreno"/"adreno" marker and keep the largest plausible
// (>= 100, i.e. real 3-digit model) version found. Some OpenCL device
// strings embed the API version before the model number, e.g.
// "QUALCOMM Adreno(TM) (OpenCL 3.0 Adreno(TM) 740)": parsing only the
// first marker yields 3 (from "OpenCL 3.0") and mis-tiers the GPU below
// Vulkan; the second "Adreno 740" marker recovers the real version.
int best = -1;
for (const char * p = s; *p; ++p) {
if (std::strncmp(p, "Adreno", 6) != 0 &&
std::strncmp(p, "adreno", 6) != 0) {
continue;
}
const char * q = p + 6; // strlen("Adreno") == strlen("adreno") == 6
while (*q && !(*q >= '0' && *q <= '9') && *q != 'X' && *q != 'x') ++q;
if (!*q) continue;
if (*q == 'X' || *q == 'x') {
if (*(q + 1) >= '0' && *(q + 1) <= '9') { // "Adreno X1-..." family
if (800 > best) best = 800;
}
continue; // "Xclipse" etc. is not Adreno-X
}
int v = 0;
bool overflow = false;
while (*q >= '0' && *q <= '9') {
v = v * 10 + (*q - '0');
++q;
if (v > 100000) { overflow = true; break; }
}
// Adreno models are 3-digit; ignore API-version noise like "OpenCL 3.0".
if (!overflow && v >= 100 && v > best) best = v;
}
return v;
return best;
}

bool is_adreno_6xx(const char * s) {
Expand All @@ -242,14 +256,48 @@ bool is_adreno_700plus(const char * s) {
return v >= 700;
}

// True if the device name/description identifies a Qualcomm Adreno GPU.
// Unlike parse_adreno_version (which needs a 3-digit model number and so
// returns -1 for the bare OpenCL "QUALCOMM Adreno(TM)" string), this is a
// vendor check used to gate Android GPU selection. ASCII case-insensitive
// because the strings vary in capitalisation: ggml-opencl reports
// CL_DEVICE_NAME ("QUALCOMM Adreno(TM)") and ggml-vulkan reports the Vulkan
// deviceName ("Adreno (TM) 740").
bool is_qualcomm_adreno(const char * name, const char * desc) {
auto contains_ci = [](const char * hay, const char * needle) -> bool {
if (!hay || !needle) return false;
for (const char * h = hay; *h; ++h) {
const char * a = h;
const char * b = needle;
while (*a && *b) {
const char ca = (*a >= 'A' && *a <= 'Z') ? char(*a + 32) : *a;
const char cb = (*b >= 'A' && *b <= 'Z') ? char(*b + 32) : *b;
if (ca != cb) break;
++a;
++b;
}
if (!*b) return true;
}
return false;
};
return contains_ci(name, "adreno") || contains_ci(desc, "adreno") ||
contains_ci(name, "qualcomm") || contains_ci(desc, "qualcomm");
}

// Pick a GPU backend using the same tier policy as parakeet-cpp's
// `init_gpu_backend` / llm-llamacpp's BackendSelection: ggml-opencl
// is only used when an Adreno 700+ device is present (where its
// kernels are validated and faster than Vulkan); every other GPU
// (Vulkan, Metal, CUDA, Mali, Intel iGPU, ...) goes through the
// non-OpenCL preference. Adreno 6xx OpenCL is known broken
// (incorrect outputs) and is force-skipped unless the caller opts
// in via `TTS_CPP_ALLOW_ADRENO_6XX=1`.
// (Vulkan, Metal, CUDA, Intel iGPU, ...) goes through the non-OpenCL
// preference. Adreno 6xx OpenCL is known broken (incorrect outputs)
// and is force-skipped unless the caller opts in via
// `TTS_CPP_ALLOW_ADRENO_6XX=1`.
//
// On Android the device walk is additionally gated to Qualcomm Adreno
// only: other Android GPU vendors are not validated and at least one
// (ARM Mali / Tensor) aborts the host process from inside graph
// compute, so they are skipped and the engine falls back to CPU.
// Desktop GPU vendors are unaffected.
//
// Routed exclusively through the ggml-backend registry
// (`ggml_backend_load_all` + `ggml_backend_dev_*`). No direct calls
Expand Down Expand Up @@ -292,6 +340,29 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
const char * reg_name = dev_reg_name(dev);
const bool is_opencl = reg_name && std::strcmp(reg_name, "OpenCL") == 0;

#if defined(__ANDROID__)
// Android GPU allowlist: only Qualcomm Adreno is validated for the
// tts-cpp GPU backends (OpenCL on Adreno 700+, Vulkan as the
// bring-up fallback). Other Android GPU vendors are not validated,
// and at least one (ARM Mali / Tensor) aborts the whole host
// process from inside ggml_backend_graph_compute via GGML_ASSERT ->
// ggml_abort(), which cannot be caught from C++. Skip non-Adreno
// devices so the policy falls through to CPU instead of risking a
// fatal abort on an unvalidated driver.
if (!is_qualcomm_adreno(name, desc)) {
if (verbose) {
fprintf(stderr,
"%s: Android GPU '%s' (%s) is not Qualcomm Adreno; "
"skipping (only Adreno is validated on Android; "
"falling through to CPU)\n",
log_prefix,
name ? name : "?",
desc ? desc : "?");
}
continue;
}
#endif

const int adreno_v = std::max(parse_adreno_version(name),
parse_adreno_version(desc));
if (adreno_v > max_adreno_version) max_adreno_version = adreno_v;
Expand Down Expand Up @@ -331,10 +402,11 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
// 1. Adreno 700+: prefer OpenCL (validated, faster than Vulkan
// on Snapdragon 8 Gen 2/3/4 etc.).
// 2. Anything else with a non-OpenCL GPU: prefer that
// (Vulkan on all non-Adreno Android, Metal on Apple, CUDA
// on Linux/Windows desktop, Mali iGPU via Vulkan, ...).
// 3. Last resort: any other OpenCL device (e.g. desktop OpenCL
// or non-Adreno mobile when no Vulkan is registered).
// (Adreno Vulkan on Android — non-Adreno is filtered out
// above; Metal on Apple; CUDA / Vulkan on Linux/Windows
// desktop).
// 3. Last resort: any other OpenCL device (e.g. desktop OpenCL,
// or Adreno OpenCL whose version string lacked a model number).
auto try_init = [&](const std::vector<Cand> & bucket) -> ggml_backend_t {
for (const Cand & c : bucket) {
ggml_backend_t b = ggml_backend_dev_init(c.dev, nullptr);
Expand Down
6 changes: 6 additions & 0 deletions tts-cpp/src/backend_selection.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,10 @@ int parse_adreno_version(const char * s);
bool is_adreno_6xx(const char * s);
bool is_adreno_700plus(const char * s);

// Vendor check (name OR description, ASCII case-insensitive): true for a
// Qualcomm Adreno GPU. Unlike parse_adreno_version it does not require a
// model number, so it also matches the bare OpenCL "QUALCOMM Adreno(TM)"
// string. Used to gate Android GPU selection to the only validated vendor.
bool is_qualcomm_adreno(const char * name, const char * desc);

} // namespace tts_cpp::detail
6 changes: 6 additions & 0 deletions tts-cpp/src/chatterbox_cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ struct cli_params {
std::string tokens_file; // optional pre-tokenized speech tokens (skips T3)
std::string text; // input text for T3
std::string output; // legacy: speech-tokens output file (if set, write tokens)
std::string dump_mel_path; // optional: dump S3Gen intermediates (_mu/_step0_dxdt/mel) to .npy for debugging
// S3Gen + HiFT vocoder:
std::string s3gen_gguf; // enables full text → wav pipeline
std::string out_wav; // wav output path (requires --s3gen-gguf)
Expand Down Expand Up @@ -450,6 +451,7 @@ static void print_usage(const char * argv0) {
fprintf(stderr, " With --s3gen-gguf this is interpreted as *speech* tokens\n");
fprintf(stderr, " and the T3 step is skipped.\n");
fprintf(stderr, " --output PATH Write generated speech tokens to PATH (text mode).\n");
fprintf(stderr, " --dump-mel-path PATH Debug: dump S3Gen mel to PATH, encoder to PATH_mu.npy, CFM step0 to PATH_step0_dxdt.npy.\n");
fprintf(stderr, "\n");
fprintf(stderr, " --s3gen-gguf PATH Enables the full text -> wav pipeline (S3Gen + HiFT).\n");
fprintf(stderr, " --out PATH Output wav file when --s3gen-gguf is set.\n");
Expand Down Expand Up @@ -590,6 +592,7 @@ static bool parse_args(int argc, char ** argv, cli_params & params) {
else if (arg == "--text") { auto v = next("--text"); if (!v) return false; params.text = v; }
else if (arg == "--tokens-file") { auto v = next("--tokens-file"); if (!v) return false; params.tokens_file = v; }
else if (arg == "--output") { auto v = next("--output"); if (!v) return false; params.output = v; }
else if (arg == "--dump-mel-path") { auto v = next("--dump-mel-path"); if (!v) return false; params.dump_mel_path = v; }
else if (arg == "--s3gen-gguf") { auto v = next("--s3gen-gguf"); if (!v) return false; params.s3gen_gguf = v; }
else if (arg == "--out") { auto v = next("--out"); if (!v) return false; params.out_wav = v; }
else if (arg == "--ref-dir") { auto v = next("--ref-dir"); if (!v) return false; params.ref_dir = v; }
Expand Down Expand Up @@ -982,6 +985,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
opts.verbose = params.verbose;
opts.n_gpu_layers = params.n_gpu_layers;
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down Expand Up @@ -1265,6 +1269,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
// chunk; --cfm-steps falls in as the per-chunk default below
// (`stream_cfm_steps > 0 ? stream_cfm_steps : cfm_steps`).
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down Expand Up @@ -2063,6 +2068,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
// Streaming chunks honour --stream-cfm-steps with --cfm-steps as
// fallback when copts is set up further below.
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down
Loading
Loading