Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 76 additions & 24 deletions tts-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,55 @@ if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
endif()
endif()

# Android default backend stack: dynamic loading of Vulkan + OpenCL +
# per-arch CPU variants. Mirrors parakeet-cpp's same-repo sibling and
# the qvac llm-llamacpp Android config (see
# qvac-registry-vcpkg/ports/llama-cpp/portfile.cmake) so the tts-cpp
# Android prebuilds drop into the same `qvac__tts-ggml/` folder shape
# as the parakeet / llamacpp ones: a `.bare` module + sibling
# `lib<prefix>ggml-{vulkan,opencl,cpu-android_armv*_*}.so` files that
# `ggml_backend_load_all_from_path()` discovers at runtime.
#
# Selection at runtime is centralised in
# `tts_cpp::detail::init_gpu_backend()` (src/backend_selection.cpp):
# OpenCL when an Adreno 700+ device is present, Vulkan for every
# other GPU (non-Adreno, Adreno < 700, Mali, Xclipse, ...). No
# static GPU backend entry points are linked anywhere in libtts-cpp;
# the registry walk reaches the right backend in both
# GGML_BACKEND_DL=ON (Android prebuild) and GGML_BACKEND_DL=OFF
# (desktop dev) modes.
#
# Callers that have specific reasons to deviate (e.g. a desktop
# bring-up build that wants Vulkan only) can still override any of
# these at the cmake command line; we only set defaults that haven't
# already been provided.
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (NOT DEFINED CACHE{GGML_BACKEND_DL})
set(GGML_BACKEND_DL ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_CPU_ALL_VARIANTS})
set(GGML_CPU_ALL_VARIANTS ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_CPU_REPACK})
set(GGML_CPU_REPACK ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_VULKAN})
set(GGML_VULKAN ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_OPENCL})
set(GGML_OPENCL ON CACHE BOOL "" FORCE)
endif()
# ggml-vulkan's coopmat / coopmat2 shader compile pulls in
# extensions that most Android Vulkan drivers don't expose; the
# upstream llama Android build disables both for the same reason.
if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT})
set(GGML_VULKAN_DISABLE_COOPMAT ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT2})
set(GGML_VULKAN_DISABLE_COOPMAT2 ON CACHE BOOL "" FORCE)
endif()
endif()

# Two related workarounds for clang-cl / MSVC builds on Windows. Both
# come from msys2 sneaking GCC-flavoured libraries onto CMake's search
# paths and being mismatched against MSVC-compiled translation units.
Expand Down Expand Up @@ -161,33 +210,28 @@ if (MSVC)
add_compile_definitions(_USE_MATH_DEFINES _CRT_SECURE_NO_WARNINGS)
endif()

# INTERFACE library that holds the GGML_USE_<BACKEND> compile defines
# every TU that includes ggml.h needs to dispatch correctly on the
# enabled backend. The tts-cpp library AND any test executable that
# recompiles src/chatterbox_tts.cpp / src/main.cpp from source (i.e.
# bypasses the tts-cpp link) must link against this; otherwise the
# #ifdef GGML_USE_<BACKEND> branches inside those TUs evaluate as
# undefined and the GPU code paths get silently compiled out of the
# test executable, even when the parent build did enable the backend.
# Mirrors parakeet-cpp's parakeet-backend-defs INTERFACE lib.
# Legacy interface library kept for export-set compatibility (it is
# still part of `install(EXPORT tts-cppTargets)` below and downstream
# `find_package(tts-cpp)` consumers list it as a link dep). Body
# intentionally empty: tts-cpp now routes every backend decision
# through the ggml-backend registry
# (`ggml_backend_load_all` + `ggml_backend_dev_*`, see
# `init_gpu_backend()` / `init_cpu_backend()` / `init_blas_backend()`
# in src/backend_selection.cpp) and does NOT call any
# `ggml_backend_<backend>_init` / `ggml_backend_is_<backend>` entry
# point directly. The `GGML_USE_VULKAN` / `GGML_USE_OPENCL` /
# `GGML_USE_METAL` / `GGML_USE_CUDA` / `GGML_USE_BLAS` compile defines
# that used to live here were only consumed by `#ifdef` cascades that
# called those static entry points; with the registry-only design
# they're dead, and shipping them would falsely advertise a static
# backend dependency that the GGML_BACKEND_DL=ON Android/Linux builds
# explicitly do not have (their backends live in separately-loadable
# `.so` files that are dlopen()'d by `ggml_backend_load_all_from_path`
# at runtime). Mirrors parakeet-cpp's `parakeet-backend-defs`.
add_library(tts-cpp-backend-defs INTERFACE)
if (GGML_CUDA)
target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_CUDA)
endif()
if (GGML_METAL)
target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_METAL)
endif()
if (GGML_VULKAN)
target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_VULKAN)
endif()
if (GGML_BLAS)
target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_BLAS)
endif()
if (GGML_OPENCL)
target_compile_definitions(tts-cpp-backend-defs INTERFACE GGML_USE_OPENCL)
endif()

set(TTS_CPP_LIB_SOURCES
src/backend_selection.cpp
src/main.cpp
src/chatterbox_cli.cpp
src/gpt2_bpe.cpp
Expand Down Expand Up @@ -594,7 +638,15 @@ if (TTS_CPP_BUILD_TESTS)
tts_cpp_apply_ccache(test-metal-ops)
# Metal-only kernel parity check. Useful only when built with
# -DGGML_METAL=ON; skipped on CI fleets without Metal via `ctest -LE gpu`.
# GGML_USE_METAL is supplied locally here (rather than via
# tts-cpp-backend-defs) because the library itself no longer
# consumes the macro -- every #ifdef GGML_USE_<X> in src/ was
# removed alongside the registry-only refactor. The test still
# uses the macro to gate its direct ggml_backend_metal_init()
# call site (it's exercising the Metal-backend implementation
# directly, not going through tts-cpp's backend selection).
if (GGML_METAL)
target_compile_definitions(test-metal-ops PRIVATE GGML_USE_METAL)
tts_cpp_register_test(test-metal-ops LABEL "gpu")
endif()

Expand Down
49 changes: 47 additions & 2 deletions tts-cpp/include/tts-cpp/chatterbox/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,57 @@ struct EngineOptions {
std::string voice_dir;

// Backend selection. n_gpu_layers > 0 enables the first available
// GPU backend (CUDA → Metal → Vulkan → OpenCL in build order), falling
// back to the CPU backend when none is compiled in or initialisation fails.
// GPU backend via the Adreno-tier policy: Adreno 700+ -> OpenCL,
// every other GPU (Vulkan on non-Adreno Android, Metal on Apple,
// CUDA on Linux/Windows desktop, Mali iGPU via Vulkan, ...) -> the
// non-OpenCL preference. Adreno 6xx OpenCL is force-skipped (broken
// kernels) unless `TTS_CPP_ALLOW_ADRENO_6XX=1` is set in the env.
// Falls back to the CPU backend when no GPU was requested, none is
// registered, or every candidate refused init.
// The exact per-layer split is not used today; any positive value
// moves the whole model to the GPU.
int n_gpu_layers = 0;

// Directory to scan for dynamically-loaded ggml backends
// (`libspeech-ggml-vulkan.so`, `libspeech-ggml-opencl.so`,
// `libspeech-ggml-cpu-android_armv8.2_1.so`, ...). Forwarded to
// `ggml_backend_load_all_from_path()` on the first Engine
// construction in the process; subsequent constructions reuse the
// already-populated registry.
//
// Leave empty to fall back to ggml's default search path
// (`ggml_backend_load_all()`), which walks compile-time defaults
// (`$EXE_DIR`, `LD_LIBRARY_PATH`, ...). Embedded host applications
// built with `GGML_BACKEND_DL=ON` (the Android / Linux non-Apple
// default; see CMakeLists.txt) should pass an explicit dir
// because the .so files ship next to the host's binary in a
// platform-specific subfolder rather than on the system loader's
// path.
//
// No-op on builds where ggml is statically linked
// (`GGML_BACKEND_DL=OFF`, e.g. desktop dev cmake builds and the
// Apple xcframework). On those, every backend is registered at
// constructor time from inside libggml and no filesystem scan
// takes place.
std::string backends_dir;

// Sets `$GGML_OPENCL_CACHE_DIR` before the first backend init so
// ggml-opencl persists `clCreateProgramWithBinary` blobs across
// process restarts (see the program-binary-cache patch on
// qvac-ext-ggml@speech). Strongly recommended on Android where
// the cold `clBuildProgram` cost dominates first-utterance
// latency; pass a writable per-app directory (typically the
// app's `cacheDir` from the host platform).
//
// Honoured only on `__ANDROID__` builds; ignored elsewhere
// (desktop OpenCL platforms don't ship the binary-cache patch
// and would otherwise pollute the user's tmpdir).
//
// Leave empty to keep the existing `$GGML_OPENCL_CACHE_DIR` env
// value (or no cache at all). Wrapper scripts that already
// export the env take precedence.
std::string opencl_cache_dir;

// 0 = std::thread::hardware_concurrency() (capped at 4 by default).
int n_threads = 0;

Expand Down
27 changes: 27 additions & 0 deletions tts-cpp/include/tts-cpp/supertonic/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,33 @@ struct EngineOptions {
int n_threads = 0;
int n_gpu_layers = 0;

// Directory to scan for dynamically-loaded ggml backends
// (`libspeech-ggml-vulkan.so`, `libspeech-ggml-opencl.so`,
// `libspeech-ggml-cpu-android_armv8.2_1.so`, ...). Forwarded to
// `ggml_backend_load_all_from_path()` on the first Engine
// construction in the process; subsequent constructions reuse the
// already-populated registry.
//
// Leave empty to fall back to ggml's default search path
// (`ggml_backend_load_all()`). Embedded host applications built
// with `GGML_BACKEND_DL=ON` (the Android / Linux non-Apple
// default; see CMakeLists.txt) should pass an explicit dir so the
// .so files ship next to the host's binary in a per-module
// folder rather than relying on `LD_LIBRARY_PATH` / `dlopen()`
// heuristics. No-op on `GGML_BACKEND_DL=OFF` (static-link)
// builds.
std::string backends_dir;

// Sets `$GGML_OPENCL_CACHE_DIR` before the first backend init so
// ggml-opencl persists `clCreateProgramWithBinary` blobs across
// process restarts. Strongly recommended on Android where the
// cold `clBuildProgram` cost dominates first-utterance latency;
// pass a writable per-app directory (typically the app's
// `cacheDir` from the host platform).
//
// Honoured only on `__ANDROID__` builds; ignored elsewhere.
std::string opencl_cache_dir;

// Optional path to a .npy file containing the initial noise tensor of
// shape [1, latent_channels, latent_len] (float32). When provided,
// latent_len is taken from the npy file (overriding the duration-
Expand Down
Loading
Loading