tetherto · freddy311082 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
@@ -5,6 +5,36 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.1] - 2026-06-05
+
+### Added
+
+- **Supertonic now supports GPU execution.** Consumes `tts-cpp`
+  `2026-06-05`, which brings the QVAC-18605 Supertonic Vulkan/Metal
+  optimisations (rounds 1-13, ~34× realtime on Apple M-series Metal)
+  and the QVAC-19254 sched + cpu_backend refactor for Adreno OpenCL.
+  Caller intent (`useGPU` / `nGpuLayers`) is now honoured for Supertonic
+  the same way it is for Chatterbox; backend selection follows
+  tts-cpp's `init_gpu_backend` tier policy (Adreno 700+ → OpenCL,
+  otherwise Vulkan/Metal/CUDA via the registry walk, otherwise CPU).
+
+### Changed
+
+- Removed the validateConfig hard-throw on `useGPU=true` /
+  `nGpuLayers != 0` for Supertonic in both `SupertonicModel.cpp` and
+  `index.js`. The conflicting-pair check (`useGPU=true` + `nGpuLayers=0`
+  or vice versa) is preserved.
+- Removed the Android force-off block in `SupertonicModel::loadLocked`.
+  Android GPU selection is delegated to tts-cpp's `init_gpu_backend`
+  tier policy (Qualcomm Adreno allowlist; Mali / non-Adreno skipped).
+- Flipped the C++ unit tests that previously expected GPU rejection
+  (`test_supertonic_config.cpp::UseGpuTrueRejectedWithExplanation`,
+  `NGpuLayersGreaterThanZeroRejected`) into acceptance tests; added a
+  new test asserting the cross-field conflict check is still enforced.
+- Flipped the Supertonic entry in `test/integration/gpu-smoke.test.js`
+  from "rejected at constructor" to "must engage GPU backend on
+  GPU-capable platforms", mirroring the Chatterbox smoke contract.
+
 ## [0.2.0] - 2026-06-02
 
 ### Changed

@@ -250,7 +250,7 @@ backend persist its compiled program cache across launches.
 | `backendsDir`             | string     | `path.join(__dirname, 'prebuilds')` | Root dir the addon scans for dynamically-loaded ggml backend `.so` files.  Required on Android (host should pass `path.join(__dirname, 'prebuilds')`); ignored on platforms that statically link the backend |
 | `openclCacheDir`          | string     | unset      | Android-only: directory where the OpenCL backend persists its compiled program-binary cache.  Setting it across runs avoids re-JITing the kernels on every fresh process |
 | `config.language`         | string     | `"en"`     | Chatterbox MTL accepts `es/fr/de/pt/it/zh/ja/ko/...`; turbo & Supertonic are English |
-| `config.useGPU`           | boolean    | `false`    | Set to `true` to route through Metal / Vulkan / OpenCL if available.  Ignored on Android (forced to CPU at the C++ engine boundary); rejected by Supertonic at construction time (engine is CPU-only today) |
+| `config.useGPU`           | boolean    | `false`    | Set to `true` to route through Metal / Vulkan / OpenCL if available, on either Chatterbox or Supertonic.  Backend selection follows tts-cpp's `init_gpu_backend` tier policy (Adreno 700+ → OpenCL, otherwise Vulkan/Metal/CUDA via the registry walk, otherwise CPU) |
 | `config.outputSampleRate` | number     | 24000      | Resample native 24 kHz output |
 | `opts.stats`              | boolean    | `false`    | Populate `response.stats` with RTF, `backendDevice` (0=CPU, 1=GPU), `backendId` (0=CPU, 1=Metal, 3=Vulkan, 4=OpenCL, 99=other) etc. |
 | `opts.exclusiveRun`       | boolean    | `false`    | Serialize overlapping streaming runs |

@@ -19,10 +19,13 @@ struct SupertonicConfig {
    * Tri-state GPU intent (mirrors ChatterboxConfig::useGpu):
    *   - std::nullopt: unspecified, let the engine use its library default.
    *   - true:         if nGpuLayers unset, maps to nGpuLayers=99.
-   *                   Note: SupertonicModel::validateConfig still rejects
-   *                   any GPU intent today because the Supertonic
-   *                   engine is CPU-only ("CPU only today" — see
-   *                   tts-cpp include/tts-cpp/supertonic/engine.h).
+   *                   Honoured as of tts-cpp@2026-06-05 (QVAC-18605
+   *                   Supertonic Vulkan/Metal optimisations + QVAC-19254
+   *                   sched/cpu_backend refactor for Adreno OpenCL).
+   *                   Backend selection follows tts-cpp's init_gpu_backend
+   *                   tier policy (Adreno 700+ -> OpenCL, otherwise
+   *                   Vulkan/Metal/CUDA via the registry walk, otherwise
+   *                   CPU).
    *   - false:        if nGpuLayers unset, forces nGpuLayers=0 (CPU).
    *
    * Conflicts with nGpuLayers (true + 0, or false + !=0) are rejected

@@ -126,19 +126,12 @@ void SupertonicModel::validateConfig(const SupertonicConfig& cfg) {
               "(useGPU:true + nGpuLayers!=0, or useGPU:false + nGpuLayers=0).");
     }
   }
-  const bool wantsGpu =
-      cfg.useGpu.value_or(false) ||
-      (cfg.nGpuLayers.has_value() && *cfg.nGpuLayers != 0);
-  if (wantsGpu) {
-    throw StatusError(
-        general_error::InvalidArgument,
-        "SupertonicModel: GPU execution is not supported by the Supertonic "
-        "engine yet (see tts-cpp include/tts-cpp/supertonic/engine.h: \"CPU "
-        "only today\"). GPU output is currently silently wrong "
-        "(~4x quieter, slightly truncated) on the Vulkan vector-estimator "
-        "+ vocoder path. Pass useGPU: false (and leave nGpuLayers unset or "
-        "0) when constructing a Supertonic model.");
-  }
+  // GPU execution is supported as of tts-cpp@2026-06-05 (QVAC-18605
+  // Supertonic Vulkan/Metal optimisations + QVAC-19254 sched/cpu_backend
+  // refactor for Adreno OpenCL).  Backend selection follows tts-cpp's
+  // init_gpu_backend tier policy: Adreno 700+ -> OpenCL, otherwise
+  // Vulkan/Metal/CUDA via the registry walk, otherwise CPU.  Caller
+  // intent (useGPU / nGpuLayers) is honoured.
 }
 
 void SupertonicModel::load() {
@@ -160,23 +153,13 @@ void SupertonicModel::reload() {
 void SupertonicModel::loadLocked() {
   if (engine_) return;
 
-  // Force useGPU to false on Android until Vulkan (Mali) and OpenCL (Adreno)
-  // stabilize for the Supertonic graph.
-#ifdef __ANDROID__
-  {
-    const bool wantsGpu =
-        cfg_.useGpu.value_or(false) ||
-        (cfg_.nGpuLayers.has_value() && *cfg_.nGpuLayers != 0);
-    if (wantsGpu) {
-      QLOG(logger::Priority::WARNING,
-           "Supertonic: useGPU=true is currently ignored on Android "
-           "(GPU backends disabled at engine boundary pending Vulkan/Mali "
-           "and OpenCL/Adreno driver fixes); falling back to CPU.");
-    }
-    cfg_.useGpu     = false;
-    cfg_.nGpuLayers = 0;
-  }
-#endif
+  // Android GPU policy is delegated to tts-cpp's init_gpu_backend tier
+  // policy as of QVAC-19254: it allowlists Qualcomm Adreno (OpenCL on
+  // Adreno 700+, falls through to Vulkan / CPU on other tiers) and
+  // skips Mali / non-Adreno GPUs that would abort ggml_backend_graph_
+  // compute.  No extra force-off at this boundary; consumers asking
+  // for useGPU=true on Android will get Adreno-OpenCL when available
+  // and CPU otherwise.
 
   try {
     engine_ = std::make_shared<tts_cpp::supertonic::Engine>(toEngineOptions(cfg_));

@@ -80,50 +80,65 @@ TEST(SupertonicValidate, NonexistentNoiseNpyRejected) {
   EXPECT_THROW(SupertonicModel{cfg}, StatusError);
 }
 
-TEST(SupertonicValidate, UseGpuTrueRejectedWithExplanation) {
+TEST(SupertonicValidate, UseGpuTrueAcceptedAtConstruction) {
+  // QVAC-19255 (companion to PR-bump-to-tts-cpp-128dae42): Supertonic
+  // gained Vulkan/Metal GPU support in tts-cpp@2026-06-05 (QVAC-18605
+  // rounds 1-13 + QVAC-19254 sched). validateConfig must now ACCEPT
+  // useGPU=true at construction time. The stub GGUF file still fails
+  // parsing on load() — that's exercised below — but construction
+  // itself no longer rejects on GPU intent.
   auto cfg = minimallyValidStubConfig();
   cfg.useGpu = true;
+  std::unique_ptr<SupertonicModel> m;
+  EXPECT_NO_THROW(m = std::make_unique<SupertonicModel>(cfg));
+  ASSERT_NE(m, nullptr);
+  EXPECT_FALSE(m->isLoaded());
+}
+
+TEST(SupertonicValidate, NGpuLayersGreaterThanZeroAccepted) {
+  // Companion to UseGpuTrueAcceptedAtConstruction: explicit
+  // nGpuLayers > 0 is no longer rejected at validation. Loading the
+  // stub will throw on GGUF parse, but the constructor must succeed.
+  auto cfg = minimallyValidStubConfig();
+  cfg.nGpuLayers = 99;
+  std::unique_ptr<SupertonicModel> m;
+  EXPECT_NO_THROW(m = std::make_unique<SupertonicModel>(cfg));
+  ASSERT_NE(m, nullptr);
+  EXPECT_FALSE(m->isLoaded());
+}
+
+TEST(SupertonicValidate, UseGpuNGpuLayersConflictStillRejected) {
+  // The cross-field conflict check (useGPU=true + nGpuLayers=0, or
+  // useGPU=false + nGpuLayers!=0) is still enforced after the GPU
+  // gate was lifted, so callers can't silently get the opposite
+  // backend they asked for.
+  auto cfg = minimallyValidStubConfig();
+  cfg.useGpu = true;
+  cfg.nGpuLayers = 0;
   bool threw = false;
   try {
     SupertonicModel m(cfg);
   } catch (const StatusError& e) {
     threw = true;
     const std::string what = e.what();
-    EXPECT_NE(what.find("GPU"), std::string::npos)
-        << "error should mention GPU; got: " << what;
-    EXPECT_NE(what.find("Supertonic"), std::string::npos)
-        << "error should mention Supertonic engine; got: " << what;
+    EXPECT_NE(what.find("conflicts with nGpuLayers"), std::string::npos)
+        << "error should explain the conflict; got: " << what;
   }
   EXPECT_TRUE(threw);
 }
 
-TEST(SupertonicValidate, NGpuLayersGreaterThanZeroRejected) {
-  auto cfg = minimallyValidStubConfig();
-  cfg.nGpuLayers = 99;
-  EXPECT_THROW(SupertonicModel{cfg}, StatusError);
-}
-
 TEST(SupertonicValidate, NGpuLayersZeroAcceptedAndDeferredLoad) {
   auto cfg = minimallyValidStubConfig();
   cfg.nGpuLayers = 0;
-  // Validation passes (CPU-only path); the stub file then fails GGUF
-  // parsing on load() (not at construction — load is now deferred to
-  // waitForLoadInitialization).  The eventual throw must NOT be the
-  // GPU-rejection branch.
+  // Validation passes (CPU path); the stub file then fails GGUF
+  // parsing on load() (not at construction — load is deferred to
+  // waitForLoadInitialization). Locks the contract that construction
+  // succeeds for any internally-consistent CPU config.
   std::unique_ptr<SupertonicModel> m;
   EXPECT_NO_THROW(m = std::make_unique<SupertonicModel>(cfg));
   ASSERT_NE(m, nullptr);
   EXPECT_FALSE(m->isLoaded());
-  bool threw = false;
-  try {
-    m->load();
-  } catch (const StatusError& e) {
-    threw = true;
-    const std::string what = e.what();
-    EXPECT_EQ(what.find("GPU"), std::string::npos)
-        << "nGpuLayers=0 should not trigger the GPU-rejection path; got: " << what;
-  }
-  EXPECT_TRUE(threw);
+  EXPECT_THROW(m->load(), StatusError);
   EXPECT_FALSE(m->isLoaded());
 }
 

@@ -31,8 +31,8 @@
  * `bash scripts/convert-models.sh -t supertonic-mtl`).  The
  * English-pinned single-sentence entry point lives in supertonic-tts.js.
  *
- * NOTE: Supertonic is CPU-only in tts-cpp today.  This example sets
- * useGPU=false explicitly to match.
+ * NOTE: Supertonic gained GPU support in tts-cpp@2026-06-05.  This
+ * example keeps useGPU=false so it runs identically everywhere.
  */
 
 const fs = require('bare-fs')

@@ -37,8 +37,9 @@
  * supertonic-mtl-sweep-tts.js; for the simpler English-pinned entry
  * point see supertonic-tts.js.
  *
- * NOTE: Supertonic is CPU-only in tts-cpp today.  This example sets
- * useGPU=false explicitly to match.
+ * NOTE: Supertonic gained GPU support in tts-cpp@2026-06-05.  This
+ * example keeps useGPU=false so it runs identically everywhere; flip
+ * to true on GPU-capable hosts to engage Metal / Vulkan / Adreno-OpenCL.
  */
 
 const fs = require('bare-fs')

@@ -20,9 +20,9 @@
  * Expects the Supertonic GGUF at:
  *   models/supertonic.gguf
  *
- * NOTE: Supertonic is CPU-only in tts-cpp today; this example sets
- * useGPU=false explicitly.  See supertonic-tts.js for the full
- * limitation context.
+ * NOTE: Supertonic gained GPU support in tts-cpp@2026-06-05; this
+ * example keeps useGPU=false so it runs identically everywhere.  See
+ * supertonic-tts.js for the GPU opt-in pattern.
  */
 
 const fs = require('bare-fs')

@@ -29,11 +29,11 @@
  * ONNX bundle into a single .gguf via
  * scripts/convert-supertonic2-to-gguf.py --arch supertonic.
  *
- * NOTE: Supertonic is CPU-only in tts-cpp today (engine docstring at
- * include/tts-cpp/supertonic/engine.h: "CPU only today").  Passing
- * useGPU=true throws at construction with a message pointing at the
- * limitation; the example explicitly sets useGPU=false.  Chatterbox
- * (turbo + MTL) keeps GPU enabled by default.
+ * NOTE: Supertonic gained GPU support in tts-cpp@2026-06-05 (QVAC-18605
+ * Vulkan/Metal optimisations + QVAC-19254 Adreno OpenCL sched). Pass
+ * useGPU=true on GPU-capable hosts to engage Metal / Vulkan / CUDA /
+ * Adreno-OpenCL via the tts-cpp init_gpu_backend tier policy; this
+ * example keeps useGPU=false so it runs identically everywhere.
  */
 
 const fs = require('bare-fs')

@@ -49,7 +49,7 @@ declare interface TTSGgmlFiles {
 declare interface TTSGgmlRuntimeConfig {
   /** Language code; default "en". Chatterbox MTL accepts es/fr/de/pt/it/zh/ja/ko/... */
   language?: string
-  /** Route inference through a GPU backend (Metal / Vulkan / CUDA / OpenCL) if available.  Defaults to `false` for both engines (opt-in via `useGPU: true` on GPU-capable hosts).  Supertonic still rejects `useGPU: true` at construction time (engine is CPU-only today). */
+  /** Route inference through a GPU backend (Metal / Vulkan / CUDA / OpenCL) if available, on either Chatterbox or Supertonic.  Defaults to `false` for both engines (opt-in via `useGPU: true` on GPU-capable hosts). */
   useGPU?: boolean
   /** Resample the engine's native rate (24 kHz Chatterbox, 44.1 kHz Supertonic) to this rate before emitting (8000-192000 Hz). */
   outputSampleRate?: number
@@ -68,7 +68,7 @@ declare interface TTSGgmlOptions {
   voiceDir?: string
   /** RNG seed for CFM initial noise + SineGen excitation (Chatterbox) / vector-estimator latent (Supertonic). */
   seed?: number
-  /** Move N layers to the GPU backend.  Chatterbox: pass 99 to move everything.  Supertonic: must be 0 / unset (engine is CPU-only today). */
+  /** Move N layers to the GPU backend.  Chatterbox + Supertonic: pass 99 to move everything. */
   nGpuLayers?: number
   /** Override `std::thread::hardware_concurrency()`. */
   threads?: number

@@ -362,22 +362,11 @@ class TTSGgml {
           'agnostic runStream() / runStreaming() / run({ streamOutput: true }) APIs.'
         )
       }
-      const wantsGpu =
-        this._config.useGPU === true ||
-        (this._nGpuLayers != null && this._nGpuLayers !== 0)
-      if (wantsGpu) {
-        throw new Error(
-          'tts-ggml: GPU execution is not supported by the Supertonic engine yet ' +
-          '(see tts-cpp include/tts-cpp/supertonic/engine.h: "CPU only today"). ' +
-          'GPU output is currently silently wrong (~4x quieter, slightly truncated) ' +
-          'because the Vulkan path of the supertonic vector-estimator + vocoder is ' +
-          'not yet validated.  Pass config: { useGPU: false } (and leave nGpuLayers ' +
-          'unset, or set it to 0) when constructing a Supertonic model. ' +
-          'Chatterbox also defaults to CPU now; opt in with ' +
-          'config: { useGPU: true } on GPU-capable hosts.'
-        )
-      }
-      if (this._config.useGPU === undefined) {
+      // GPU is supported as of tts-cpp@2026-06-05 (QVAC-18605 Supertonic
+      // Vulkan/Metal optimisations + QVAC-19254 sched/cpu_backend for
+      // Adreno OpenCL). Default-off mirrors Chatterbox; callers opt in
+      // with config: { useGPU: true } on GPU-capable hosts.
+      if (this._config.useGPU === undefined && this._nGpuLayers == null) {
         this._config.useGPU = false
       }
     } else if (this._config.useGPU === undefined && this._nGpuLayers == null) {

@@ -1,6 +1,6 @@
 {
   "name": "@qvac/tts-ggml",
-  "version": "0.2.0",
+  "version": "0.2.1",
   "description": "Text to Speech (TTS) addon for qvac (ggml backend, wrapping the chatterbox + supertonic engines from tts-cpp)",
   "addon": true,
   "engines": {

@@ -164,25 +164,43 @@ test('Chatterbox GPU smoke - useGPU=true must engage the GPU backend on GPU-capa
   }
 })
 
-test('Supertonic GPU smoke - useGPU=true is rejected at constructor (engine is CPU-only today)', { timeout: 60000 }, async (t) => {
-  const TTSGgml = require('@qvac/tts-ggml')
-  let threw = false
+test('Supertonic GPU smoke - useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => {
+  // QVAC-19255: Supertonic gained Vulkan/Metal/Adreno-OpenCL support
+  // in tts-cpp@2026-06-05 (QVAC-18605 rounds 1-13 + QVAC-19254 sched).
+  // This test mirrors the Chatterbox GPU smoke above: useGPU=true on
+  // a GPU-capable platform must resolve to a real GPU backend, not
+  // silently fall back to CPU.
+  const baseDir = getBaseDir()
+  const modelsDir = path.join(baseDir, 'models')
+
+  const download = await ensureSupertonicModel({ targetDir: modelsDir })
+  if (!download || !download.success) {
+    t.fail('Supertonic GGUF not available - registry fetch failed. Run `npm run download-models:registry` or stage models locally.')
+    return
+  }
+
+  const supertonicPath = download.path ||
+    path.join(modelsDir, 'supertonic.gguf')
+
+  const model = await loadSupertonicTTS({
+    supertonicModelPath: supertonicPath,
+    language: 'en',
+    voice: 'F1',
+    useGPU: true
+  })
   try {
-    /* eslint no-new: 0 */
-    new TTSGgml({
-      engine: TTSGgml.ENGINE_SUPERTONIC,
-      files: { supertonicModel: '/dev/null' },
-      voice: 'F1',
-      config: { language: 'en', useGPU: true }
-    })
-  } catch (e) {
-    threw = true
-    t.ok(/CPU only today/.test(e.message),
-      'rejection message references the engine docstring')
-    t.ok(/Pass config:.*useGPU: false/.test(e.message),
-      'rejection message tells user how to fix')
+    const result = await runSupertonicTTS(
+      model,
+      { text: 'GPU smoke check.' },
+      { minSamples: 5000 }
+    )
+    console.log(result.output)
+    t.ok(result.passed, 'Supertonic/GPU produced expected sample count')
+    t.ok(result.data.sampleCount > 0, 'Supertonic/GPU produced audio')
+    assertGpuBackend(t, 'Supertonic', result.data.stats)
+  } finally {
+    try { await model.unload() } catch (_e) {}
   }
-  t.ok(threw, 'TTSGgml constructor should throw on Supertonic + useGPU:true')
 })
 
 // CPU smoke: useGPU:false must actually pin the engine to CPU on every

@@ -119,5 +119,5 @@ test('Supertonic MTL: voice + language together survive ttsParams round-trip', (
   t.is(params.steps, 6)
   t.is(params.speed, 1.1)
   t.is(params.seed, 13)
-  t.is(params.useGPU, false, 'supertonic stays CPU-only on the JS side')
+  t.is(params.useGPU, false, 'useGPU defaults to false when unspecified')
 })
@@ -74,7 +74,7 @@ test('Supertonic: ttsParams shape passes voice/steps/speed/seed/threads/useGPU',
   t.is(params.speed, 1.25)
   t.is(params.seed, 7)
   t.is(params.threads, 2)
-  t.is(params.nGpuLayers, 0, 'nGpuLayers=0 is the only allowed GPU value for supertonic today')
+  t.is(params.nGpuLayers, 0, 'nGpuLayers is passed through verbatim to ttsParams')
   t.is(params.useGPU, false, 'useGPU follows config.useGPU')
   t.absent(params.t3ModelPath, 'no t3 path leaked into supertonic params')
   t.absent(params.s3genModelPath, 'no s3gen path leaked into supertonic params')