Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,24 @@ if (TTS_CPP_BUILD_TESTS)
target_link_libraries(test-streaming PRIVATE ggml)
target_include_directories(test-streaming PRIVATE ggml/include src include)

# CPU-side persistent-cache validation (QVAC-18422).
# Exercises the time_mlp / time_emb / cfm_estimator / weight_mirror
# caches that amortise per-synth overhead on the multilingual CPU
# path. Links the chatterbox_tts.cpp directly so it can reach the
# internal test-hook entrypoints.
add_executable(test-cpu-caches
src/test_cpu_caches.cpp
src/chatterbox_tts.cpp)
target_link_libraries(test-cpu-caches PRIVATE ggml)
target_include_directories(test-cpu-caches PRIVATE ggml/include src include)

# T3 step-graph cache validation (QVAC-18422 round 4). Links
# against the full tts-cpp library so it gets t3_mtl.cpp's
# cached eval_step_mtl alongside the test-hook entrypoints.
add_executable(test-t3-caches src/test_t3_caches.cpp)
target_link_libraries(test-t3-caches PRIVATE tts-cpp ggml)
target_include_directories(test-t3-caches PRIVATE ggml/include src include)

add_executable(test-metal-ops src/test_metal_ops.cpp)
target_link_libraries(test-metal-ops PRIVATE ggml)
target_include_directories(test-metal-ops PRIVATE ggml/include src)
Expand Down
617 changes: 617 additions & 0 deletions PROGRESS.md

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions src/chatterbox_cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,12 @@ int tts_cpp_cli_main(int argc, char ** argv) {
tts_cpp::chatterbox::detail::t3_stack_unregister(
model.buffer_stack, model.ctx_stack);
}
// QVAC-18422 round 4: drop the T3 step-graph cache
// BEFORE freeing the backend. The cache holds
// gallocators that carry backend references; freeing
// them against a dead backend would assert inside the
// ggml-metal / ggml-vulkan / ggml-cuda dylib finalisers.
tts_cpp::chatterbox::detail::t3_release_caches();
ggml_backend_buffer_free(model.buffer_w);
ggml_backend_buffer_free(model.buffer_kv);
if (model.buffer_stack) ggml_backend_buffer_free(model.buffer_stack);
Expand Down Expand Up @@ -2332,6 +2338,9 @@ int tts_cpp_cli_main(int argc, char ** argv) {
(long long)t3_total_ms, t3_tokens_total);

ggml_gallocr_free(allocr);
// QVAC-18422 round 4: drop T3 step-graph cache BEFORE freeing
// the backend (gallocators in cached entries reference it).
tts_cpp::chatterbox::detail::t3_release_caches();
ggml_backend_buffer_free(model.buffer_w);
ggml_backend_buffer_free(model.buffer_kv);
if (model.buffer_override) ggml_backend_buffer_free(model.buffer_override);
Expand Down
5 changes: 5 additions & 0 deletions src/chatterbox_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ struct Engine::Impl {
if (model.buffer_stack || model.ctx_stack) {
t3_stack_unregister(model.buffer_stack, model.ctx_stack);
}
// QVAC-18422 round 4: drop the T3 step-graph cache BEFORE
// freeing the backend. Cached gallocators carry backend
// references; freeing them against a dead backend asserts
// inside the GPU-backend dylib finalisers.
tts_cpp::chatterbox::detail::t3_release_caches();
if (model.buffer_w) { ggml_backend_buffer_free(model.buffer_w); model.buffer_w = nullptr; }
if (model.buffer_kv) { ggml_backend_buffer_free(model.buffer_kv); model.buffer_kv = nullptr; }
if (model.buffer_stack) { ggml_backend_buffer_free(model.buffer_stack); model.buffer_stack = nullptr; }
Expand Down
10 changes: 10 additions & 0 deletions src/chatterbox_t3_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,16 @@ bool eval_step_mtl(
std::vector<float> & logits_cond_out,
std::vector<float> & logits_uncond_out);

// Release every persistent T3-side cache held in this translation
// unit (currently the round-4 step-graph cache). Idempotent.
//
// Production callers (CLI free_t3 lambda, Engine::Impl::free_model)
// MUST call this BEFORE `ggml_backend_free(model.backend)` because
// the cached gallocators carry backend references; freeing them
// against a freed backend would assert inside ggml-metal /
// ggml-vulkan / ggml-cuda dylib finalisers.
void t3_release_caches();

// On a degenerate logits distribution (everything -inf after the sampling
// cascade), returns `stop_token` so the caller's stop check fires cleanly
// instead of emitting a pseudo-random in-vocab id. Pass
Expand Down
Loading