tetherto · ishanvohra2 · May 11, 2026 · May 8, 2026 · May 8, 2026 · May 11, 2026
diff --git a/packages/tts-ggml/addon/src/js-interface/JSAdapter.cpp b/packages/tts-ggml/addon/src/js-interface/JSAdapter.cpp
@@ -68,10 +68,9 @@ std::string readOptionalString(
   return v.value_or(std::string{});
 }
 
-bool readOptionalBool(
-    js::Object obj, js_env_t* env, const char* key, bool fallback = false) {
-  auto b = obj.getOptionalPropertyAs<js::Boolean, bool>(env, key);
-  return b.value_or(fallback);
+std::optional<bool> readOptionalBool(
+    js::Object obj, js_env_t* env, const char* key) {
+  return obj.getOptionalPropertyAs<js::Boolean, bool>(env, key);
 }
 
 }
@@ -118,15 +117,11 @@ chatterbox::ChatterboxConfig JSAdapter::buildChatterboxConfig(
   cfg.streamChunkTokens       = readOptionalInt(configurationParams, env, "streamChunkTokens");
   cfg.streamFirstChunkTokens  = readOptionalInt(configurationParams, env, "streamFirstChunkTokens");
   cfg.streamCfmSteps          = readOptionalInt(configurationParams, env, "cfmSteps");
-  // The JS layer is the source of truth for useGPU: index.js only sets
-  // params.useGPU when _config.useGPU is non-null, so absence here means
-  // "not specified".  readOptionalBool collapses that to `false`, which
-  // is harmless today because nGpuLayers (also forwarded above) takes
-  // precedence in toEngineOptions when both are set.  If a future caller
-  // ever needs to distinguish "explicitly false" from "unset" on the C++
-  // side, switch this to std::optional<bool> + a readOptionalBoolOpt
-  // helper; today the explicit nGpuLayers always wins, so leave the
-  // implicit-false default in place.
+  // useGPU is tri-state on the C++ side: std::nullopt means "unspecified"
+  // (let the engine pick its default); true/false are explicit user
+  // intent.  ChatterboxModel::validateConfig rejects useGPU/nGpuLayers
+  // conflicts, and toEngineOptions translates explicit-false into
+  // n_gpu_layers=0 so CPU is actually forced.
   cfg.useGpu                  = readOptionalBool(configurationParams, env, "useGPU");
   return cfg;
 }

diff --git a/packages/tts-ggml/addon/src/model-interface/chatterbox/ChatterboxConfig.hpp b/packages/tts-ggml/addon/src/model-interface/chatterbox/ChatterboxConfig.hpp
@@ -33,8 +33,17 @@ struct ChatterboxConfig {
   std::optional<int> nGpuLayers;
   /** Post-processing output sample rate.  Currently unused (engine always emits 24 kHz). */
   std::optional<int> outputSampleRate;
-  /** Shortcut: if true and nGpuLayers unset, maps to nGpuLayers=99. */
-  bool useGpu = false;
+  /**
+   * Tri-state GPU intent:
+   *   - std::nullopt: unspecified, let the engine use its library default.
+   *   - true:         if nGpuLayers unset, maps to nGpuLayers=99.
+   *   - false:        if nGpuLayers unset, forces nGpuLayers=0 (CPU).
+   *
+   * Conflicts with nGpuLayers (true + 0, or false + !=0) are rejected
+   * by ChatterboxModel::validateConfig so callers can't silently get
+   * the opposite backend they asked for.
+   */
+  std::optional<bool> useGpu;
   /**
    * Native streaming controls.  When `streamChunkTokens > 0` and the
    * caller passes a chunk callback on the job input, the engine runs

diff --git a/packages/tts-ggml/addon/src/model-interface/chatterbox/ChatterboxModel.cpp b/packages/tts-ggml/addon/src/model-interface/chatterbox/ChatterboxModel.cpp
@@ -36,8 +36,11 @@ tts_cpp::chatterbox::EngineOptions toEngineOptions(const ChatterboxConfig& cfg)
   if (cfg.threads.has_value()) opts.n_threads    = *cfg.threads;
   if (cfg.nGpuLayers.has_value()) {
     opts.n_gpu_layers = *cfg.nGpuLayers;
-  } else if (cfg.useGpu) {
-    opts.n_gpu_layers = 99;
+  } else if (cfg.useGpu.has_value()) {
+    // Explicit useGpu must produce an explicit n_gpu_layers so we don't
+    // depend on the tts-cpp library default flipping out from under us
+    // (see also: gpu-smoke.test.js asserts backendDevice from this).
+    opts.n_gpu_layers = *cfg.useGpu ? 99 : 0;
   }
   if (cfg.streamChunkTokens.has_value())      opts.stream_chunk_tokens       = *cfg.streamChunkTokens;
   if (cfg.streamFirstChunkTokens.has_value()) opts.stream_first_chunk_tokens = *cfg.streamFirstChunkTokens;
@@ -79,6 +82,23 @@ ChatterboxModel::ChatterboxModel(ChatterboxConfig config)
 ChatterboxModel::~ChatterboxModel() noexcept = default;
 
 void ChatterboxModel::validateConfig(const ChatterboxConfig& cfg) {
+  if (cfg.useGpu.has_value() && cfg.nGpuLayers.has_value()) {
+    const bool wantsGpu = *cfg.useGpu;
+    const int  layers   = *cfg.nGpuLayers;
+    // `layers != 0` (rather than `layers > 0`) so a llama.cpp-style
+    // sentinel like nGpuLayers=-1 ("offload all layers") is treated as
+    // "wants GPU" and doesn't falsely pass through against useGPU:true.
+    const bool layersWantGpu = layers != 0;
+    if (wantsGpu != layersWantGpu) {
+      throw StatusError(
+          general_error::InvalidArgument,
+          std::string("ChatterboxModel: useGPU=") +
+              (wantsGpu ? "true" : "false") +
+              " conflicts with nGpuLayers=" + std::to_string(layers) +
+              ". Either drop one of the two, or make them agree "
+              "(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).");
+    }
+  }
   if (cfg.t3ModelPath.empty()) {
     throw StatusError(general_error::InvalidArgument, "t3ModelPath is required");
   }

diff --git a/packages/tts-ggml/addon/src/model-interface/supertonic/SupertonicConfig.hpp b/packages/tts-ggml/addon/src/model-interface/supertonic/SupertonicConfig.hpp
@@ -15,7 +15,21 @@ struct SupertonicConfig {
   std::optional<int> threads;
   std::optional<int> nGpuLayers;
   std::optional<int> outputSampleRate;
-  bool useGpu = false;
+  /**
+   * Tri-state GPU intent (mirrors ChatterboxConfig::useGpu):
+   *   - std::nullopt: unspecified, let the engine use its library default.
+   *   - true:         if nGpuLayers unset, maps to nGpuLayers=99.
+   *                   Note: SupertonicModel::validateConfig still rejects
+   *                   any GPU intent today because the Supertonic
+   *                   engine is CPU-only ("CPU only today" — see
+   *                   tts-cpp include/tts-cpp/supertonic/engine.h).
+   *   - false:        if nGpuLayers unset, forces nGpuLayers=0 (CPU).
+   *
+   * Conflicts with nGpuLayers (true + 0, or false + !=0) are rejected
+   * by validateConfig so callers can't silently get the opposite
+   * backend they asked for.
+   */
+  std::optional<bool> useGpu;
   std::string noiseNpyPath;
 };
 

diff --git a/packages/tts-ggml/addon/src/model-interface/supertonic/SupertonicModel.cpp b/packages/tts-ggml/addon/src/model-interface/supertonic/SupertonicModel.cpp
@@ -36,8 +36,8 @@ tts_cpp::supertonic::EngineOptions toEngineOptions(const SupertonicConfig& cfg)
   if (cfg.threads.has_value()) opts.n_threads = *cfg.threads;
   if (cfg.nGpuLayers.has_value()) {
     opts.n_gpu_layers = *cfg.nGpuLayers;
-  } else if (cfg.useGpu) {
-    opts.n_gpu_layers = 99;
+  } else if (cfg.useGpu.has_value()) {
+    opts.n_gpu_layers = *cfg.useGpu ? 99 : 0;
   }
   opts.noise_npy_path = cfg.noiseNpyPath;
   return opts;
@@ -87,8 +87,29 @@ void SupertonicModel::validateConfig(const SupertonicConfig& cfg) {
     throw createTTSError(TTSErrorCode::ModelFileNotFound,
                          "noise npy not found: " + cfg.noiseNpyPath);
   }
+  // Defense-in-depth: the JS layer (index.js::_validateConfig) runs the
+  // same conflict check before this method is reached, so direct C++
+  // callers are the only ones who can actually trip this branch.
+  // Mirror the Chatterbox suffix verbatim so users see an identical
+  // hint regardless of which engine they instantiated.  `layers != 0`
+  // matches llama.cpp's "-1 = offload all" sentinel convention.
+  if (cfg.useGpu.has_value() && cfg.nGpuLayers.has_value()) {
+    const bool wantsGpuFlag   = *cfg.useGpu;
+    const int  layers         = *cfg.nGpuLayers;
+    const bool layersWantGpu  = layers != 0;
+    if (wantsGpuFlag != layersWantGpu) {
+      throw StatusError(
+          general_error::InvalidArgument,
+          std::string("SupertonicModel: useGPU=") +
+              (wantsGpuFlag ? "true" : "false") +
+              " conflicts with nGpuLayers=" + std::to_string(layers) +
+              ". Either drop one of the two, or make them agree "
+              "(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).");
+    }
+  }
   const bool wantsGpu =
-      cfg.useGpu || (cfg.nGpuLayers.has_value() && *cfg.nGpuLayers > 0);
+      cfg.useGpu.value_or(false) ||
+      (cfg.nGpuLayers.has_value() && *cfg.nGpuLayers != 0);
   if (wantsGpu) {
     throw StatusError(
         general_error::InvalidArgument,

diff --git a/packages/tts-ggml/addon/tests/test_chatterbox_config.cpp b/packages/tts-ggml/addon/tests/test_chatterbox_config.cpp
@@ -142,7 +142,7 @@ TEST(ChatterboxValidate, ConfigDefaultLanguageIsEnglish) {
 
 TEST(ChatterboxValidate, ConfigUseGpuDefaultIsFalse) {
   ChatterboxConfig cfg;
-  EXPECT_FALSE(cfg.useGpu);
+  EXPECT_FALSE(cfg.useGpu.has_value());
   EXPECT_FALSE(cfg.seed.has_value());
   EXPECT_FALSE(cfg.threads.has_value());
   EXPECT_FALSE(cfg.nGpuLayers.has_value());

diff --git a/packages/tts-ggml/addon/tests/test_supertonic_config.cpp b/packages/tts-ggml/addon/tests/test_supertonic_config.cpp
@@ -137,7 +137,7 @@ TEST(SupertonicValidate, WaitForLoadInitializationDelegatesToLoad) {
 TEST(SupertonicValidate, ConfigDefaultsAreCpuFriendly) {
   SupertonicConfig cfg;
   EXPECT_EQ(cfg.language, "en");
-  EXPECT_FALSE(cfg.useGpu);
+  EXPECT_FALSE(cfg.useGpu.has_value());
   EXPECT_FALSE(cfg.nGpuLayers.has_value());
   EXPECT_FALSE(cfg.steps.has_value());
   EXPECT_FALSE(cfg.speed.has_value());

diff --git a/packages/tts-ggml/index.js b/packages/tts-ggml/index.js
@@ -292,6 +292,28 @@ class TTSGgml {
     this._speed = speed
     this._noiseNpyPath = noiseNpyPath
 
+    // Run the conflict check before any engine-specific GPU policy so a
+    // caller passing { useGPU:false, nGpuLayers:99 } gets the precise
+    // conflict message instead of, e.g., the Supertonic "GPU not
+    // supported" branch firing on `nGpuLayers > 0` and confusing them.
+    // `layers != 0` (rather than `layers > 0`) so a future llama.cpp-
+    // style `nGpuLayers: -1` ("offload all layers") doesn't falsely
+    // pass through as "wants CPU" against an explicit useGPU:true.
+    if (
+      typeof this._config.useGPU === 'boolean' &&
+      this._nGpuLayers != null
+    ) {
+      const layersWantGpu = this._nGpuLayers !== 0
+      if (this._config.useGPU !== layersWantGpu) {
+        throw new Error(
+          'tts-ggml: useGPU=' + this._config.useGPU +
+          ' conflicts with nGpuLayers=' + this._nGpuLayers + '. ' +
+          'Either drop one of the two, or make them agree ' +
+          '(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).'
+        )
+      }
+    }
+
     if (this._engineType === ENGINE_SUPERTONIC) {
       if (this._streamChunkTokens != null || this._streamFirstChunkTokens != null) {
         throw new Error(
@@ -304,7 +326,7 @@ class TTSGgml {
       }
       const wantsGpu =
         this._config.useGPU === true ||
-        (this._nGpuLayers != null && this._nGpuLayers > 0)
+        (this._nGpuLayers != null && this._nGpuLayers !== 0)
       if (wantsGpu) {
         throw new Error(
           'tts-ggml: GPU execution is not supported by the Supertonic engine yet ' +

diff --git a/packages/tts-ggml/test/integration/gpu-smoke.test.js b/packages/tts-ggml/test/integration/gpu-smoke.test.js
@@ -29,7 +29,8 @@ const proc = require('bare-process')
 const test = require('brittle')
 
 const { loadChatterboxTTS, runChatterboxTTS } = require('../utils/runChatterboxTTS')
-const { ensureChatterboxModels } = require('../utils/downloadModel')
+const { loadSupertonicTTS, runSupertonicTTS } = require('../utils/runSupertonicTTS')
+const { ensureChatterboxModels, ensureSupertonicModel } = require('../utils/downloadModel')
 
 const platform = os.platform()
 const isMobile = platform === 'ios' || platform === 'android'
@@ -104,6 +105,23 @@ function assertGpuBackend (t, engineTag, stats) {
   }
 }
 
+// Companion to assertGpuBackend: when the caller passes useGPU=false we
+// expect the engine to actually pick the CPU backend.  This is the gate
+// that prevents `useGPU=false` from silently still running on GPU when
+// the underlying tts-cpp library default is non-zero n_gpu_layers.
+function assertCpuBackend (t, engineTag, stats) {
+  if (!stats) {
+    t.fail(`${engineTag}/CPU: no response.stats returned (cannot verify backend)`)
+    return
+  }
+  const dev = stats.backendDevice
+  const id = stats.backendId
+  const name = backendIdToName(id)
+  console.log(`[${engineTag}/CPU] backendDevice=${dev} backendId=${id} (${name})`)
+  t.is(dev, 0, `${engineTag}: useGPU:false must resolve to backendDevice=0 (CPU), got ${name}`)
+  t.is(id, 0, `${engineTag}: useGPU:false must resolve to backendId=0 (CPU), got ${name}`)
+}
+
 test('Chatterbox GPU smoke - useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => {
   const baseDir = getBaseDir()
   const modelsDir = path.join(baseDir, 'models')
@@ -161,3 +179,82 @@ test('Supertonic GPU smoke - useGPU=true is rejected at constructor (engine is C
   }
   t.ok(threw, 'TTSGgml constructor should throw on Supertonic + useGPU:true')
 })
+
+// CPU smoke: useGPU:false must actually pin the engine to CPU on every
+// platform (no NO_GPU skip — CPU is expected to work everywhere).  This
+// is the counterpart to the GPU smoke above and exists because the
+// previous tts-ggml behaviour left n_gpu_layers at the tts-cpp library
+// default when useGPU:false was passed without an explicit nGpuLayers,
+// which could silently fall back to GPU.  Now that ChatterboxModel /
+// SupertonicModel translate explicit useGPU=false → n_gpu_layers=0,
+// these tests lock that contract in.
+test('Chatterbox CPU smoke - useGPU=false must run on the CPU backend', { timeout: 600000 }, async (t) => {
+  const baseDir = getBaseDir()
+  const modelsDir = path.join(baseDir, 'models')
+
+  const download = await ensureChatterboxModels({ targetDir: modelsDir })
+  if (!download.success) {
+    t.pass('Skipped: Chatterbox GGUFs not available locally')
+    return
+  }
+
+  const refWavPath = path.join(__dirname, '..', 'reference-audio', 'jfk.wav')
+  if (!fs.existsSync(refWavPath)) {
+    t.pass('Skipped: reference audio missing')
+    return
+  }
+
+  const model = await loadChatterboxTTS({
+    modelDir: download.targetDir,
+    refWavPath,
+    language: 'en',
+    useGPU: false
+  })
+  try {
+    const result = await runChatterboxTTS(
+      model,
+      { text: 'CPU smoke check.' },
+      { minSamples: 5000 }
+    )
+    console.log(result.output)
+    t.ok(result.passed, 'Chatterbox/CPU produced expected sample count')
+    t.ok(result.data.sampleCount > 0, 'Chatterbox/CPU produced audio')
+    assertCpuBackend(t, 'Chatterbox', result.data.stats)
+  } finally {
+    try { await model.unload() } catch (_e) {}
+  }
+})
+
+test('Supertonic CPU smoke - useGPU=false must run on the CPU backend', { timeout: 600000 }, async (t) => {
+  const baseDir = getBaseDir()
+  const modelsDir = path.join(baseDir, 'models')
+
+  const download = await ensureSupertonicModel({ targetDir: modelsDir })
+  if (!download || !download.success) {
+    t.pass('Skipped: Supertonic GGUF not available locally')
+    return
+  }
+
+  const supertonicPath = download.path ||
+    path.join(modelsDir, 'supertonic.gguf')
+
+  const model = await loadSupertonicTTS({
+    supertonicModelPath: supertonicPath,
+    language: 'en',
+    voice: 'F1',
+    useGPU: false
+  })
+  try {
+    const result = await runSupertonicTTS(
+      model,
+      { text: 'CPU smoke check.' },
+      { minSamples: 5000 }
+    )
+    console.log(result.output)
+    t.ok(result.passed, 'Supertonic/CPU produced expected sample count')
+    t.ok(result.data.sampleCount > 0, 'Supertonic/CPU produced audio')
+    assertCpuBackend(t, 'Supertonic', result.data.stats)
+  } finally {
+    try { await model.unload() } catch (_e) {}
+  }
+})