Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 97 additions & 25 deletions tts-cpp/src/backend_selection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,24 +212,38 @@ void ensure_backends_loaded() {
// reach the same decision on the same hardware.
int parse_adreno_version(const char * s) {
if (!s) return -1;
const char * p = std::strstr(s, "Adreno");
if (!p) p = std::strstr(s, "adreno");
if (!p) return -1;
p += 6; // strlen("Adreno") == strlen("adreno") == 6
while (*p && !(*p >= '0' && *p <= '9') && *p != 'X' && *p != 'x') ++p;
if (!*p) return -1;
if (*p == 'X' || *p == 'x') {
++p;
if (*p < '0' || *p > '9') return -1; // "Xclipse" etc. is not Adreno-X
return 800;
}
int v = 0;
while (*p >= '0' && *p <= '9') {
v = v * 10 + (*p - '0');
++p;
if (v > 100000) return -1;
// Scan EVERY "Adreno"/"adreno" marker and keep the largest plausible
// (>= 100, i.e. real 3-digit model) version found. Some OpenCL device
// strings embed the API version before the model number, e.g.
// "QUALCOMM Adreno(TM) (OpenCL 3.0 Adreno(TM) 740)": parsing only the
// first marker yields 3 (from "OpenCL 3.0") and mis-tiers the GPU below
// Vulkan; the second "Adreno 740" marker recovers the real version.
int best = -1;
for (const char * p = s; *p; ++p) {
if (std::strncmp(p, "Adreno", 6) != 0 &&
std::strncmp(p, "adreno", 6) != 0) {
continue;
}
const char * q = p + 6; // strlen("Adreno") == strlen("adreno") == 6
while (*q && !(*q >= '0' && *q <= '9') && *q != 'X' && *q != 'x') ++q;
if (!*q) continue;
if (*q == 'X' || *q == 'x') {
if (*(q + 1) >= '0' && *(q + 1) <= '9') { // "Adreno X1-..." family
if (800 > best) best = 800;
}
continue; // "Xclipse" etc. is not Adreno-X
}
int v = 0;
bool overflow = false;
while (*q >= '0' && *q <= '9') {
v = v * 10 + (*q - '0');
++q;
if (v > 100000) { overflow = true; break; }
}
// Adreno models are 3-digit; ignore API-version noise like "OpenCL 3.0".
if (!overflow && v >= 100 && v > best) best = v;
}
return v;
return best;
}

bool is_adreno_6xx(const char * s) {
Expand All @@ -242,14 +256,48 @@ bool is_adreno_700plus(const char * s) {
return v >= 700;
}

// True if the device name/description identifies a Qualcomm Adreno GPU.
// Unlike parse_adreno_version (which needs a 3-digit model number and so
// returns -1 for the bare OpenCL "QUALCOMM Adreno(TM)" string), this is a
// vendor check used to gate Android GPU selection. ASCII case-insensitive
// because the strings vary in capitalisation: ggml-opencl reports
// CL_DEVICE_NAME ("QUALCOMM Adreno(TM)") and ggml-vulkan reports the Vulkan
// deviceName ("Adreno (TM) 740").
bool is_qualcomm_adreno(const char * name, const char * desc) {
auto contains_ci = [](const char * hay, const char * needle) -> bool {
if (!hay || !needle) return false;
for (const char * h = hay; *h; ++h) {
const char * a = h;
const char * b = needle;
while (*a && *b) {
const char ca = (*a >= 'A' && *a <= 'Z') ? char(*a + 32) : *a;
const char cb = (*b >= 'A' && *b <= 'Z') ? char(*b + 32) : *b;
if (ca != cb) break;
++a;
++b;
}
if (!*b) return true;
}
return false;
};
return contains_ci(name, "adreno") || contains_ci(desc, "adreno") ||
contains_ci(name, "qualcomm") || contains_ci(desc, "qualcomm");
}

// Pick a GPU backend using the same tier policy as parakeet-cpp's
// `init_gpu_backend` / llm-llamacpp's BackendSelection: ggml-opencl
// is only used when an Adreno 700+ device is present (where its
// kernels are validated and faster than Vulkan); every other GPU
// (Vulkan, Metal, CUDA, Mali, Intel iGPU, ...) goes through the
// non-OpenCL preference. Adreno 6xx OpenCL is known broken
// (incorrect outputs) and is force-skipped unless the caller opts
// in via `TTS_CPP_ALLOW_ADRENO_6XX=1`.
// (Vulkan, Metal, CUDA, Intel iGPU, ...) goes through the non-OpenCL
// preference. Adreno 6xx OpenCL is known broken (incorrect outputs)
// and is force-skipped unless the caller opts in via
// `TTS_CPP_ALLOW_ADRENO_6XX=1`.
//
// On Android the device walk is additionally gated to Qualcomm Adreno
// only: other Android GPU vendors are not validated and at least one
// (ARM Mali / Tensor) aborts the host process from inside graph
// compute, so they are skipped and the engine falls back to CPU.
// Desktop GPU vendors are unaffected.
//
// Routed exclusively through the ggml-backend registry
// (`ggml_backend_load_all` + `ggml_backend_dev_*`). No direct calls
Expand Down Expand Up @@ -292,6 +340,29 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
const char * reg_name = dev_reg_name(dev);
const bool is_opencl = reg_name && std::strcmp(reg_name, "OpenCL") == 0;

#if defined(__ANDROID__)
// Android GPU allowlist: only Qualcomm Adreno is validated for the
// tts-cpp GPU backends (OpenCL on Adreno 700+, Vulkan as the
// bring-up fallback). Other Android GPU vendors are not validated,
// and at least one (ARM Mali / Tensor) aborts the whole host
// process from inside ggml_backend_graph_compute via GGML_ASSERT ->
// ggml_abort(), which cannot be caught from C++. Skip non-Adreno
// devices so the policy falls through to CPU instead of risking a
// fatal abort on an unvalidated driver.
if (!is_qualcomm_adreno(name, desc)) {
if (verbose) {
fprintf(stderr,
"%s: Android GPU '%s' (%s) is not Qualcomm Adreno; "
"skipping (only Adreno is validated on Android; "
"falling through to CPU)\n",
log_prefix,
name ? name : "?",
desc ? desc : "?");
}
continue;
}
#endif

const int adreno_v = std::max(parse_adreno_version(name),
parse_adreno_version(desc));
if (adreno_v > max_adreno_version) max_adreno_version = adreno_v;
Expand Down Expand Up @@ -331,10 +402,11 @@ ggml_backend_t init_gpu_backend(int n_gpu_layers,
// 1. Adreno 700+: prefer OpenCL (validated, faster than Vulkan
// on Snapdragon 8 Gen 2/3/4 etc.).
// 2. Anything else with a non-OpenCL GPU: prefer that
// (Vulkan on all non-Adreno Android, Metal on Apple, CUDA
// on Linux/Windows desktop, Mali iGPU via Vulkan, ...).
// 3. Last resort: any other OpenCL device (e.g. desktop OpenCL
// or non-Adreno mobile when no Vulkan is registered).
// (Adreno Vulkan on Android — non-Adreno is filtered out
// above; Metal on Apple; CUDA / Vulkan on Linux/Windows
// desktop).
// 3. Last resort: any other OpenCL device (e.g. desktop OpenCL,
// or Adreno OpenCL whose version string lacked a model number).
auto try_init = [&](const std::vector<Cand> & bucket) -> ggml_backend_t {
for (const Cand & c : bucket) {
ggml_backend_t b = ggml_backend_dev_init(c.dev, nullptr);
Expand Down
6 changes: 6 additions & 0 deletions tts-cpp/src/backend_selection.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,10 @@ int parse_adreno_version(const char * s);
bool is_adreno_6xx(const char * s);
bool is_adreno_700plus(const char * s);

// Vendor check (name OR description, ASCII case-insensitive): true for a
// Qualcomm Adreno GPU. Unlike parse_adreno_version it does not require a
// model number, so it also matches the bare OpenCL "QUALCOMM Adreno(TM)"
// string. Used to gate Android GPU selection to the only validated vendor.
bool is_qualcomm_adreno(const char * name, const char * desc);

} // namespace tts_cpp::detail
6 changes: 6 additions & 0 deletions tts-cpp/src/chatterbox_cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ struct cli_params {
std::string tokens_file; // optional pre-tokenized speech tokens (skips T3)
std::string text; // input text for T3
std::string output; // legacy: speech-tokens output file (if set, write tokens)
std::string dump_mel_path; // optional: dump S3Gen intermediates (_mu/_step0_dxdt/mel) to .npy for debugging
// S3Gen + HiFT vocoder:
std::string s3gen_gguf; // enables full text → wav pipeline
std::string out_wav; // wav output path (requires --s3gen-gguf)
Expand Down Expand Up @@ -450,6 +451,7 @@ static void print_usage(const char * argv0) {
fprintf(stderr, " With --s3gen-gguf this is interpreted as *speech* tokens\n");
fprintf(stderr, " and the T3 step is skipped.\n");
fprintf(stderr, " --output PATH Write generated speech tokens to PATH (text mode).\n");
fprintf(stderr, " --dump-mel-path PATH Debug: dump S3Gen mel to PATH, encoder to PATH_mu.npy, CFM step0 to PATH_step0_dxdt.npy.\n");
fprintf(stderr, "\n");
fprintf(stderr, " --s3gen-gguf PATH Enables the full text -> wav pipeline (S3Gen + HiFT).\n");
fprintf(stderr, " --out PATH Output wav file when --s3gen-gguf is set.\n");
Expand Down Expand Up @@ -590,6 +592,7 @@ static bool parse_args(int argc, char ** argv, cli_params & params) {
else if (arg == "--text") { auto v = next("--text"); if (!v) return false; params.text = v; }
else if (arg == "--tokens-file") { auto v = next("--tokens-file"); if (!v) return false; params.tokens_file = v; }
else if (arg == "--output") { auto v = next("--output"); if (!v) return false; params.output = v; }
else if (arg == "--dump-mel-path") { auto v = next("--dump-mel-path"); if (!v) return false; params.dump_mel_path = v; }
else if (arg == "--s3gen-gguf") { auto v = next("--s3gen-gguf"); if (!v) return false; params.s3gen_gguf = v; }
else if (arg == "--out") { auto v = next("--out"); if (!v) return false; params.out_wav = v; }
else if (arg == "--ref-dir") { auto v = next("--ref-dir"); if (!v) return false; params.ref_dir = v; }
Expand Down Expand Up @@ -982,6 +985,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
opts.verbose = params.verbose;
opts.n_gpu_layers = params.n_gpu_layers;
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down Expand Up @@ -1265,6 +1269,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
// chunk; --cfm-steps falls in as the per-chunk default below
// (`stream_cfm_steps > 0 ? stream_cfm_steps : cfm_steps`).
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down Expand Up @@ -2063,6 +2068,7 @@ int tts_cpp_cli_main(int argc, char ** argv) {
// Streaming chunks honour --stream-cfm-steps with --cfm-steps as
// fallback when copts is set up further below.
opts.cfm_steps = params.cfm_steps;
opts.dump_mel_path = params.dump_mel_path;
opts.cfm_f16_kv_attn = params.cfm_f16_kv_attn;
if (!params.reference_audio.empty()) {
if (!compute_prompt_feat_native(params.reference_audio, params.s3gen_gguf,
Expand Down
Loading
Loading