tetherto · GustavoA1604 · May 20, 2026 · May 20, 2026
@@ -70,6 +70,55 @@ if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
     endif()
 endif()
 
+# Android default backend stack: dynamic loading of Vulkan + OpenCL +
+# per-arch CPU variants. Mirrors parakeet-cpp's same-repo sibling and
+# the qvac llm-llamacpp Android config (see
+# qvac-registry-vcpkg/ports/llama-cpp/portfile.cmake) so the tts-cpp
+# Android prebuilds drop into the same `qvac__tts-ggml/` folder shape
+# as the parakeet / llamacpp ones: a `.bare` module + sibling
+# `lib<prefix>ggml-{vulkan,opencl,cpu-android_armv*_*}.so` files that
+# `ggml_backend_load_all_from_path()` discovers at runtime.
+#
+# Selection at runtime is centralised in
+# `tts_cpp::detail::init_gpu_backend()` (src/backend_selection.cpp):
+# OpenCL when an Adreno 700+ device is present, Vulkan for every
+# other GPU (non-Adreno, Adreno < 700, Mali, Xclipse, ...). No
+# static GPU backend entry points are linked anywhere in libtts-cpp;
+# the registry walk reaches the right backend in both
+# GGML_BACKEND_DL=ON (Android prebuild) and GGML_BACKEND_DL=OFF
+# (desktop dev) modes.
+#
+# Callers that have specific reasons to deviate (e.g. a desktop
+# bring-up build that wants Vulkan only) can still override any of
+# these at the cmake command line; we only set defaults that haven't
+# already been provided.
+if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+    if (NOT DEFINED CACHE{GGML_BACKEND_DL})
+        set(GGML_BACKEND_DL ON CACHE BOOL "" FORCE)
+    endif()
+    if (NOT DEFINED CACHE{GGML_CPU_ALL_VARIANTS})
+        set(GGML_CPU_ALL_VARIANTS ON CACHE BOOL "" FORCE)
+    endif()
+    if (NOT DEFINED CACHE{GGML_CPU_REPACK})
+        set(GGML_CPU_REPACK ON CACHE BOOL "" FORCE)
+    endif()
+    if (NOT DEFINED CACHE{GGML_VULKAN})
+        set(GGML_VULKAN ON CACHE BOOL "" FORCE)
+    endif()
+    if (NOT DEFINED CACHE{GGML_OPENCL})
+        set(GGML_OPENCL ON CACHE BOOL "" FORCE)
+    endif()
+    # ggml-vulkan's coopmat / coopmat2 shader compile pulls in
+    # extensions that most Android Vulkan drivers don't expose; the
+    # upstream llama Android build disables both for the same reason.
+    if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT})
+        set(GGML_VULKAN_DISABLE_COOPMAT ON CACHE BOOL "" FORCE)
+    endif()
+    if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT2})
+        set(GGML_VULKAN_DISABLE_COOPMAT2 ON CACHE BOOL "" FORCE)
+    endif()
+endif()
+
 # Two related workarounds for clang-cl / MSVC builds on Windows.  Both
 # come from msys2 sneaking GCC-flavoured libraries onto CMake's search
 # paths and being mismatched against MSVC-compiled translation units.
@@ -161,33 +210,28 @@ if (MSVC)
     add_compile_definitions(_USE_MATH_DEFINES _CRT_SECURE_NO_WARNINGS)
 endif()
 
-# INTERFACE library that holds the GGML_USE_<BACKEND> compile defines
-# every TU that includes ggml.h needs to dispatch correctly on the
-# enabled backend.  The tts-cpp library AND any test executable that
-# recompiles src/chatterbox_tts.cpp / src/main.cpp from source (i.e.
-# bypasses the tts-cpp link) must link against this; otherwise the
-# #ifdef GGML_USE_<BACKEND> branches inside those TUs evaluate as
-# undefined and the GPU code paths get silently compiled out of the
-# test executable, even when the parent build did enable the backend.
-# Mirrors parakeet-cpp's parakeet-backend-defs INTERFACE lib.
+# Legacy interface library kept for export-set compatibility (it is
+# still part of `install(EXPORT tts-cppTargets)` below and downstream
+# `find_package(tts-cpp)` consumers list it as a link dep). Body
+# intentionally empty: tts-cpp now routes every backend decision
+# through the ggml-backend registry
+# (`ggml_backend_load_all` + `ggml_backend_dev_*`, see
+# `init_gpu_backend()` / `init_cpu_backend()` / `init_blas_backend()`
+# in src/backend_selection.cpp) and does NOT call any
+# `ggml_backend_<backend>_init` / `ggml_backend_is_<backend>` entry
+# point directly. The `GGML_USE_VULKAN` / `GGML_USE_OPENCL` /
+# `GGML_USE_METAL` / `GGML_USE_CUDA` / `GGML_USE_BLAS` compile defines
+# that used to live here were only consumed by `#ifdef` cascades that
+# called those static entry points; with the registry-only design
+# they're dead, and shipping them would falsely advertise a static
+# backend dependency that the GGML_BACKEND_DL=ON Android/Linux builds
+# explicitly do not have (their backends live in separately-loadable
+# `.so` files that are dlopen()'d by `ggml_backend_load_all_from_path`
+# at runtime). Mirrors parakeet-cpp's `parakeet-backend-defs`.
 add_library(tts-cpp-backend-defs INTERFACE)
-if (GGML_CUDA)
-    target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_CUDA)
-endif()
-if (GGML_METAL)
-    target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_METAL)
-endif()
-if (GGML_VULKAN)
-    target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_VULKAN)
-endif()
-if (GGML_BLAS)
-    target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_BLAS)
-endif()
-if (GGML_OPENCL)
-    target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_OPENCL)
-endif()
 
 set(TTS_CPP_LIB_SOURCES
+    src/backend_selection.cpp
     src/main.cpp
     src/chatterbox_cli.cpp
     src/gpt2_bpe.cpp
@@ -594,7 +638,15 @@ if (TTS_CPP_BUILD_TESTS)
     tts_cpp_apply_ccache(test-metal-ops)
     # Metal-only kernel parity check.  Useful only when built with
     # -DGGML_METAL=ON; skipped on CI fleets without Metal via `ctest -LE gpu`.
+    # GGML_USE_METAL is supplied locally here (rather than via
+    # tts-cpp-backend-defs) because the library itself no longer
+    # consumes the macro -- every #ifdef GGML_USE_<X> in src/ was
+    # removed alongside the registry-only refactor. The test still
+    # uses the macro to gate its direct ggml_backend_metal_init()
+    # call site (it's exercising the Metal-backend implementation
+    # directly, not going through tts-cpp's backend selection).
     if (GGML_METAL)
+        target_compile_definitions(test-metal-ops PRIVATE GGML_USE_METAL)
         tts_cpp_register_test(test-metal-ops LABEL "gpu")
     endif()
 

@@ -75,12 +75,57 @@ struct EngineOptions {
     std::string voice_dir;
 
     // Backend selection.  n_gpu_layers > 0 enables the first available
-    // GPU backend (CUDA → Metal → Vulkan → OpenCL in build order), falling
-    // back to the CPU backend when none is compiled in or initialisation fails.
+    // GPU backend via the Adreno-tier policy: Adreno 700+ -> OpenCL,
+    // every other GPU (Vulkan on non-Adreno Android, Metal on Apple,
+    // CUDA on Linux/Windows desktop, Mali iGPU via Vulkan, ...) -> the
+    // non-OpenCL preference. Adreno 6xx OpenCL is force-skipped (broken
+    // kernels) unless `TTS_CPP_ALLOW_ADRENO_6XX=1` is set in the env.
+    // Falls back to the CPU backend when no GPU was requested, none is
+    // registered, or every candidate refused init.
     // The exact per-layer split is not used today; any positive value
     // moves the whole model to the GPU.
     int n_gpu_layers = 0;
 
+    // Directory to scan for dynamically-loaded ggml backends
+    // (`libspeech-ggml-vulkan.so`, `libspeech-ggml-opencl.so`,
+    // `libspeech-ggml-cpu-android_armv8.2_1.so`, ...). Forwarded to
+    // `ggml_backend_load_all_from_path()` on the first Engine
+    // construction in the process; subsequent constructions reuse the
+    // already-populated registry.
+    //
+    // Leave empty to fall back to ggml's default search path
+    // (`ggml_backend_load_all()`), which walks compile-time defaults
+    // (`$EXE_DIR`, `LD_LIBRARY_PATH`, ...). Embedded host applications
+    // built with `GGML_BACKEND_DL=ON` (the Android / Linux non-Apple
+    // default; see CMakeLists.txt) should pass an explicit dir
+    // because the .so files ship next to the host's binary in a
+    // platform-specific subfolder rather than on the system loader's
+    // path.
+    //
+    // No-op on builds where ggml is statically linked
+    // (`GGML_BACKEND_DL=OFF`, e.g. desktop dev cmake builds and the
+    // Apple xcframework). On those, every backend is registered at
+    // constructor time from inside libggml and no filesystem scan
+    // takes place.
+    std::string backends_dir;
+
+    // Sets `$GGML_OPENCL_CACHE_DIR` before the first backend init so
+    // ggml-opencl persists `clCreateProgramWithBinary` blobs across
+    // process restarts (see the program-binary-cache patch on
+    // qvac-ext-ggml@speech). Strongly recommended on Android where
+    // the cold `clBuildProgram` cost dominates first-utterance
+    // latency; pass a writable per-app directory (typically the
+    // app's `cacheDir` from the host platform).
+    //
+    // Honoured only on `__ANDROID__` builds; ignored elsewhere
+    // (desktop OpenCL platforms don't ship the binary-cache patch
+    // and would otherwise pollute the user's tmpdir).
+    //
+    // Leave empty to keep the existing `$GGML_OPENCL_CACHE_DIR` env
+    // value (or no cache at all). Wrapper scripts that already
+    // export the env take precedence.
+    std::string opencl_cache_dir;
+
     // 0 = std::thread::hardware_concurrency() (capped at 4 by default).
     int n_threads = 0;
 

@@ -56,6 +56,33 @@ struct EngineOptions {
     int   n_threads     = 0;
     int   n_gpu_layers  = 0;
 
+    // Directory to scan for dynamically-loaded ggml backends
+    // (`libspeech-ggml-vulkan.so`, `libspeech-ggml-opencl.so`,
+    // `libspeech-ggml-cpu-android_armv8.2_1.so`, ...). Forwarded to
+    // `ggml_backend_load_all_from_path()` on the first Engine
+    // construction in the process; subsequent constructions reuse the
+    // already-populated registry.
+    //
+    // Leave empty to fall back to ggml's default search path
+    // (`ggml_backend_load_all()`). Embedded host applications built
+    // with `GGML_BACKEND_DL=ON` (the Android / Linux non-Apple
+    // default; see CMakeLists.txt) should pass an explicit dir so the
+    // .so files ship next to the host's binary in a per-module
+    // folder rather than relying on `LD_LIBRARY_PATH` / `dlopen()`
+    // heuristics. No-op on `GGML_BACKEND_DL=OFF` (static-link)
+    // builds.
+    std::string backends_dir;
+
+    // Sets `$GGML_OPENCL_CACHE_DIR` before the first backend init so
+    // ggml-opencl persists `clCreateProgramWithBinary` blobs across
+    // process restarts. Strongly recommended on Android where the
+    // cold `clBuildProgram` cost dominates first-utterance latency;
+    // pass a writable per-app directory (typically the app's
+    // `cacheDir` from the host platform).
+    //
+    // Honoured only on `__ANDROID__` builds; ignored elsewhere.
+    std::string opencl_cache_dir;
+
     // Optional path to a .npy file containing the initial noise tensor of
     // shape [1, latent_channels, latent_len] (float32).  When provided,
     // latent_len is taken from the npy file (overriding the duration-