Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions packages/tts-ggml/addon/src/js-interface/JSAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,9 @@ std::string readOptionalString(
return v.value_or(std::string{});
}

bool readOptionalBool(
js::Object obj, js_env_t* env, const char* key, bool fallback = false) {
auto b = obj.getOptionalPropertyAs<js::Boolean, bool>(env, key);
return b.value_or(fallback);
std::optional<bool> readOptionalBool(
js::Object obj, js_env_t* env, const char* key) {
return obj.getOptionalPropertyAs<js::Boolean, bool>(env, key);
}

}
Expand Down Expand Up @@ -118,15 +117,11 @@ chatterbox::ChatterboxConfig JSAdapter::buildChatterboxConfig(
cfg.streamChunkTokens = readOptionalInt(configurationParams, env, "streamChunkTokens");
cfg.streamFirstChunkTokens = readOptionalInt(configurationParams, env, "streamFirstChunkTokens");
cfg.streamCfmSteps = readOptionalInt(configurationParams, env, "cfmSteps");
// The JS layer is the source of truth for useGPU: index.js only sets
// params.useGPU when _config.useGPU is non-null, so absence here means
// "not specified". readOptionalBool collapses that to `false`, which
// is harmless today because nGpuLayers (also forwarded above) takes
// precedence in toEngineOptions when both are set. If a future caller
// ever needs to distinguish "explicitly false" from "unset" on the C++
// side, switch this to std::optional<bool> + a readOptionalBoolOpt
// helper; today the explicit nGpuLayers always wins, so leave the
// implicit-false default in place.
// useGPU is tri-state on the C++ side: std::nullopt means "unspecified"
// (let the engine pick its default); true/false are explicit user
// intent. ChatterboxModel::validateConfig rejects useGPU/nGpuLayers
// conflicts, and toEngineOptions translates explicit-false into
// n_gpu_layers=0 so CPU is actually forced.
cfg.useGpu = readOptionalBool(configurationParams, env, "useGPU");
return cfg;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,17 @@ struct ChatterboxConfig {
std::optional<int> nGpuLayers;
/** Post-processing output sample rate. Currently unused (engine always emits 24 kHz). */
std::optional<int> outputSampleRate;
/** Shortcut: if true and nGpuLayers unset, maps to nGpuLayers=99. */
bool useGpu = false;
/**
* Tri-state GPU intent:
* - std::nullopt: unspecified, let the engine use its library default.
* - true: if nGpuLayers unset, maps to nGpuLayers=99.
* - false: if nGpuLayers unset, forces nGpuLayers=0 (CPU).
*
* Conflicts with nGpuLayers (true + 0, or false + !=0) are rejected
* by ChatterboxModel::validateConfig so callers can't silently get
* the opposite backend they asked for.
*/
std::optional<bool> useGpu;
/**
* Native streaming controls. When `streamChunkTokens > 0` and the
* caller passes a chunk callback on the job input, the engine runs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@ tts_cpp::chatterbox::EngineOptions toEngineOptions(const ChatterboxConfig& cfg)
if (cfg.threads.has_value()) opts.n_threads = *cfg.threads;
if (cfg.nGpuLayers.has_value()) {
opts.n_gpu_layers = *cfg.nGpuLayers;
} else if (cfg.useGpu) {
opts.n_gpu_layers = 99;
} else if (cfg.useGpu.has_value()) {
// Explicit useGpu must produce an explicit n_gpu_layers so we don't
// depend on the tts-cpp library default flipping out from under us
// (see also: gpu-smoke.test.js asserts backendDevice from this).
opts.n_gpu_layers = *cfg.useGpu ? 99 : 0;
}
if (cfg.streamChunkTokens.has_value()) opts.stream_chunk_tokens = *cfg.streamChunkTokens;
if (cfg.streamFirstChunkTokens.has_value()) opts.stream_first_chunk_tokens = *cfg.streamFirstChunkTokens;
Expand Down Expand Up @@ -79,6 +82,23 @@ ChatterboxModel::ChatterboxModel(ChatterboxConfig config)
ChatterboxModel::~ChatterboxModel() noexcept = default;

void ChatterboxModel::validateConfig(const ChatterboxConfig& cfg) {
if (cfg.useGpu.has_value() && cfg.nGpuLayers.has_value()) {
const bool wantsGpu = *cfg.useGpu;
const int layers = *cfg.nGpuLayers;
// `layers != 0` (rather than `layers > 0`) so a llama.cpp-style
// sentinel like nGpuLayers=-1 ("offload all layers") is treated as
// "wants GPU" and doesn't falsely pass through against useGPU:true.
const bool layersWantGpu = layers != 0;
if (wantsGpu != layersWantGpu) {
throw StatusError(
general_error::InvalidArgument,
std::string("ChatterboxModel: useGPU=") +
(wantsGpu ? "true" : "false") +
" conflicts with nGpuLayers=" + std::to_string(layers) +
". Either drop one of the two, or make them agree "
"(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).");
}
}
if (cfg.t3ModelPath.empty()) {
throw StatusError(general_error::InvalidArgument, "t3ModelPath is required");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,21 @@ struct SupertonicConfig {
std::optional<int> threads;
std::optional<int> nGpuLayers;
std::optional<int> outputSampleRate;
bool useGpu = false;
/**
* Tri-state GPU intent (mirrors ChatterboxConfig::useGpu):
* - std::nullopt: unspecified, let the engine use its library default.
* - true: if nGpuLayers unset, maps to nGpuLayers=99.
* Note: SupertonicModel::validateConfig still rejects
* any GPU intent today because the Supertonic
* engine is CPU-only ("CPU only today" β€” see
* tts-cpp include/tts-cpp/supertonic/engine.h).
* - false: if nGpuLayers unset, forces nGpuLayers=0 (CPU).
*
* Conflicts with nGpuLayers (true + 0, or false + !=0) are rejected
* by validateConfig so callers can't silently get the opposite
* backend they asked for.
*/
std::optional<bool> useGpu;
std::string noiseNpyPath;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ tts_cpp::supertonic::EngineOptions toEngineOptions(const SupertonicConfig& cfg)
if (cfg.threads.has_value()) opts.n_threads = *cfg.threads;
if (cfg.nGpuLayers.has_value()) {
opts.n_gpu_layers = *cfg.nGpuLayers;
} else if (cfg.useGpu) {
opts.n_gpu_layers = 99;
} else if (cfg.useGpu.has_value()) {
opts.n_gpu_layers = *cfg.useGpu ? 99 : 0;
}
opts.noise_npy_path = cfg.noiseNpyPath;
return opts;
Expand Down Expand Up @@ -87,8 +87,29 @@ void SupertonicModel::validateConfig(const SupertonicConfig& cfg) {
throw createTTSError(TTSErrorCode::ModelFileNotFound,
"noise npy not found: " + cfg.noiseNpyPath);
}
// Defense-in-depth: the JS layer (index.js::_validateConfig) runs the
// same conflict check before this method is reached, so direct C++
// callers are the only ones who can actually trip this branch.
// Mirror the Chatterbox suffix verbatim so users see an identical
// hint regardless of which engine they instantiated. `layers != 0`
// matches llama.cpp's "-1 = offload all" sentinel convention.
if (cfg.useGpu.has_value() && cfg.nGpuLayers.has_value()) {
const bool wantsGpuFlag = *cfg.useGpu;
const int layers = *cfg.nGpuLayers;
const bool layersWantGpu = layers != 0;
if (wantsGpuFlag != layersWantGpu) {
throw StatusError(
general_error::InvalidArgument,
std::string("SupertonicModel: useGPU=") +
(wantsGpuFlag ? "true" : "false") +
" conflicts with nGpuLayers=" + std::to_string(layers) +
". Either drop one of the two, or make them agree "
"(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).");
}
}
const bool wantsGpu =
cfg.useGpu || (cfg.nGpuLayers.has_value() && *cfg.nGpuLayers > 0);
cfg.useGpu.value_or(false) ||
(cfg.nGpuLayers.has_value() && *cfg.nGpuLayers != 0);
if (wantsGpu) {
throw StatusError(
general_error::InvalidArgument,
Expand Down
2 changes: 1 addition & 1 deletion packages/tts-ggml/addon/tests/test_chatterbox_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ TEST(ChatterboxValidate, ConfigDefaultLanguageIsEnglish) {

TEST(ChatterboxValidate, ConfigUseGpuDefaultIsFalse) {
ChatterboxConfig cfg;
EXPECT_FALSE(cfg.useGpu);
EXPECT_FALSE(cfg.useGpu.has_value());
EXPECT_FALSE(cfg.seed.has_value());
EXPECT_FALSE(cfg.threads.has_value());
EXPECT_FALSE(cfg.nGpuLayers.has_value());
Expand Down
2 changes: 1 addition & 1 deletion packages/tts-ggml/addon/tests/test_supertonic_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ TEST(SupertonicValidate, WaitForLoadInitializationDelegatesToLoad) {
TEST(SupertonicValidate, ConfigDefaultsAreCpuFriendly) {
SupertonicConfig cfg;
EXPECT_EQ(cfg.language, "en");
EXPECT_FALSE(cfg.useGpu);
EXPECT_FALSE(cfg.useGpu.has_value());
EXPECT_FALSE(cfg.nGpuLayers.has_value());
EXPECT_FALSE(cfg.steps.has_value());
EXPECT_FALSE(cfg.speed.has_value());
Expand Down
24 changes: 23 additions & 1 deletion packages/tts-ggml/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,28 @@ class TTSGgml {
this._speed = speed
this._noiseNpyPath = noiseNpyPath

// Run the conflict check before any engine-specific GPU policy so a
// caller passing { useGPU:false, nGpuLayers:99 } gets the precise
// conflict message instead of, e.g., the Supertonic "GPU not
// supported" branch firing on `nGpuLayers > 0` and confusing them.
// `layers != 0` (rather than `layers > 0`) so a future llama.cpp-
// style `nGpuLayers: -1` ("offload all layers") doesn't falsely
// pass through as "wants CPU" against an explicit useGPU:true.
if (
typeof this._config.useGPU === 'boolean' &&
this._nGpuLayers != null
) {
const layersWantGpu = this._nGpuLayers !== 0
if (this._config.useGPU !== layersWantGpu) {
throw new Error(
'tts-ggml: useGPU=' + this._config.useGPU +
' conflicts with nGpuLayers=' + this._nGpuLayers + '. ' +
'Either drop one of the two, or make them agree ' +
'(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).'
)
}
}

if (this._engineType === ENGINE_SUPERTONIC) {
if (this._streamChunkTokens != null || this._streamFirstChunkTokens != null) {
throw new Error(
Expand All @@ -304,7 +326,7 @@ class TTSGgml {
}
const wantsGpu =
this._config.useGPU === true ||
(this._nGpuLayers != null && this._nGpuLayers > 0)
(this._nGpuLayers != null && this._nGpuLayers !== 0)
if (wantsGpu) {
throw new Error(
'tts-ggml: GPU execution is not supported by the Supertonic engine yet ' +
Expand Down
99 changes: 98 additions & 1 deletion packages/tts-ggml/test/integration/gpu-smoke.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ const proc = require('bare-process')
const test = require('brittle')

const { loadChatterboxTTS, runChatterboxTTS } = require('../utils/runChatterboxTTS')
const { ensureChatterboxModels } = require('../utils/downloadModel')
const { loadSupertonicTTS, runSupertonicTTS } = require('../utils/runSupertonicTTS')
const { ensureChatterboxModels, ensureSupertonicModel } = require('../utils/downloadModel')

const platform = os.platform()
const isMobile = platform === 'ios' || platform === 'android'
Expand Down Expand Up @@ -104,6 +105,23 @@ function assertGpuBackend (t, engineTag, stats) {
}
}

// Companion to assertGpuBackend: when the caller passes useGPU=false we
// expect the engine to actually pick the CPU backend. This is the gate
// that prevents `useGPU=false` from silently still running on GPU when
// the underlying tts-cpp library default is non-zero n_gpu_layers.
function assertCpuBackend (t, engineTag, stats) {
if (!stats) {
t.fail(`${engineTag}/CPU: no response.stats returned (cannot verify backend)`)
return
}
const dev = stats.backendDevice
const id = stats.backendId
const name = backendIdToName(id)
console.log(`[${engineTag}/CPU] backendDevice=${dev} backendId=${id} (${name})`)
t.is(dev, 0, `${engineTag}: useGPU:false must resolve to backendDevice=0 (CPU), got ${name}`)
t.is(id, 0, `${engineTag}: useGPU:false must resolve to backendId=0 (CPU), got ${name}`)
}

test('Chatterbox GPU smoke - useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => {
const baseDir = getBaseDir()
const modelsDir = path.join(baseDir, 'models')
Expand Down Expand Up @@ -161,3 +179,82 @@ test('Supertonic GPU smoke - useGPU=true is rejected at constructor (engine is C
}
t.ok(threw, 'TTSGgml constructor should throw on Supertonic + useGPU:true')
})

// CPU smoke: useGPU:false must actually pin the engine to CPU on every
// platform (no NO_GPU skip β€” CPU is expected to work everywhere). This
// is the counterpart to the GPU smoke above and exists because the
// previous tts-ggml behaviour left n_gpu_layers at the tts-cpp library
// default when useGPU:false was passed without an explicit nGpuLayers,
// which could silently fall back to GPU. Now that ChatterboxModel /
// SupertonicModel translate explicit useGPU=false β†’ n_gpu_layers=0,
// these tests lock that contract in.
test('Chatterbox CPU smoke - useGPU=false must run on the CPU backend', { timeout: 600000 }, async (t) => {
const baseDir = getBaseDir()
const modelsDir = path.join(baseDir, 'models')

const download = await ensureChatterboxModels({ targetDir: modelsDir })
if (!download.success) {
t.pass('Skipped: Chatterbox GGUFs not available locally')
return
}

const refWavPath = path.join(__dirname, '..', 'reference-audio', 'jfk.wav')
if (!fs.existsSync(refWavPath)) {
t.pass('Skipped: reference audio missing')
return
}

const model = await loadChatterboxTTS({
modelDir: download.targetDir,
refWavPath,
language: 'en',
useGPU: false
})
try {
const result = await runChatterboxTTS(
model,
{ text: 'CPU smoke check.' },
{ minSamples: 5000 }
)
console.log(result.output)
t.ok(result.passed, 'Chatterbox/CPU produced expected sample count')
t.ok(result.data.sampleCount > 0, 'Chatterbox/CPU produced audio')
assertCpuBackend(t, 'Chatterbox', result.data.stats)
} finally {
try { await model.unload() } catch (_e) {}
}
})

test('Supertonic CPU smoke - useGPU=false must run on the CPU backend', { timeout: 600000 }, async (t) => {
const baseDir = getBaseDir()
const modelsDir = path.join(baseDir, 'models')

const download = await ensureSupertonicModel({ targetDir: modelsDir })
if (!download || !download.success) {
t.pass('Skipped: Supertonic GGUF not available locally')
return
}

const supertonicPath = download.path ||
path.join(modelsDir, 'supertonic.gguf')

const model = await loadSupertonicTTS({
supertonicModelPath: supertonicPath,
language: 'en',
voice: 'F1',
useGPU: false
})
try {
const result = await runSupertonicTTS(
model,
{ text: 'CPU smoke check.' },
{ minSamples: 5000 }
)
console.log(result.output)
t.ok(result.passed, 'Supertonic/CPU produced expected sample count')
t.ok(result.data.sampleCount > 0, 'Supertonic/CPU produced audio')
assertCpuBackend(t, 'Supertonic', result.data.stats)
} finally {
try { await model.unload() } catch (_e) {}
}
})
Loading