diff --git a/packages/bci-whispercpp/.gitignore b/packages/bci-whispercpp/.gitignore new file mode 100644 index 0000000000..b79e05d8a5 --- /dev/null +++ b/packages/bci-whispercpp/.gitignore @@ -0,0 +1,27 @@ +.vs/ +build/ +models/ +node_modules/ +.idea/ +prebuilds/ +vcpkg/cache/ +vcpkg/ports/ +!vcpkg/triplets/ +!vcpkg/toolchains/ + +test/fixtures/*.bin +test/unit/all.js +test/integration/all.js + +package-lock.json +.npmrc + +__pycache__/ +.pytest_cache/ +.vscode + +cpp-test-results.xml + +.clang-format +.clang-tidy +.valgrind.supp diff --git a/packages/bci-whispercpp/CHANGELOG.md b/packages/bci-whispercpp/CHANGELOG.md new file mode 100644 index 0000000000..d51c53ae3b --- /dev/null +++ b/packages/bci-whispercpp/CHANGELOG.md @@ -0,0 +1,40 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] + +Initial POC release of `@qvac/bci-whispercpp`, a brain-computer-interface neural +signal transcription addon powered by a BCI-patched fork of whisper.cpp. + +### Added + +- `BCIWhispercpp` client class (standalone, built on `createJobHandler` + + `exclusiveRunQueue` from `@qvac/infer-base`) with `load()`, `transcribe()`, + `transcribeFile()`, `unload()`, `destroy()`, `cancel()`, `getState()`. +- Low-level `BCIInterface` (`./bci` subpath export) for users that need direct + control over the native addon lifecycle. +- `./addonLogging` subpath exposing `setLogger` / `releaseLogger` for wiring a + native log handler. +- C++ native addon (`NeuralProcessor`, `BCIModel`, `BCIConfig`) using the + `qvac-lib-inference-addon-cpp` framework, with BCI-specific preprocessing + (Gaussian smoothing, low-rank day projection, softsign non-linearity) and + mel-layout injection into a patched whisper.cpp encoder. +- Integration tests for load/destroy, batch transcription, and a 5-sample + WER measurement (avg 6.0% on the reference fixtures). +- GoogleTest C++ unit tests covering mel shape, gaussian smoothing, padded + frames, truncation handling, invalid-config rejection, and range validation. +- `scripts/convert-model.py` to convert a BrainWhisperer checkpoint into the + GGML model + embedder binary pair consumed at runtime. +- `scripts/download-models.sh` to fetch the reference model and test fixtures + from the `bci-test-assets-v0.1.0` GitHub release. + +### Known Limitations + +- Streaming transcription is not implemented in this release; see follow-up + work tracked under QVAC-17062. +- Inference error codes live in the `26001-27000` range in the current + implementation. diff --git a/packages/bci-whispercpp/CMakeLists.txt b/packages/bci-whispercpp/CMakeLists.txt new file mode 100644 index 0000000000..3b9541eed5 --- /dev/null +++ b/packages/bci-whispercpp/CMakeLists.txt @@ -0,0 +1,136 @@ +cmake_minimum_required(VERSION 3.25) + +option(BUILD_TESTING "Build tests" OFF) + +if(BUILD_TESTING) + list(APPEND VCPKG_MANIFEST_FEATURES "tests") +endif() + +find_package(cmake-bare REQUIRED PATHS node_modules/cmake-bare) +find_package(cmake-vcpkg REQUIRED PATHS node_modules/cmake-vcpkg) + +# Prepend the local overlay triplets on every platform and preserve any +# externally-set value (matches the other qvac addons). Only the Linux +# triplets actually differ from vcpkg's defaults today, but exposing the +# directory uniformly avoids platform-conditional surprises. +set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/triplets;${VCPKG_OVERLAY_TRIPLETS}") + +project(bci-whispercpp CXX C) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_compile_options(-stdlib=libc++) + add_link_options(-stdlib=libc++ -static-libstdc++) +endif() + +find_path(VCPKG_INSTALLED_PATH share/qvac-lint-cpp/.clang-format REQUIRED) +configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-format + ${CMAKE_CURRENT_SOURCE_DIR}/.clang-format COPYONLY) +configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.clang-tidy + ${CMAKE_CURRENT_SOURCE_DIR}/.clang-tidy COPYONLY) +configure_file(${VCPKG_INSTALLED_PATH}/share/qvac-lint-cpp/.valgrind.supp + ${CMAKE_CURRENT_SOURCE_DIR}/.valgrind.supp COPYONLY) + +find_path(QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS "qvac-lib-inference-addon-cpp/ModelInterfaces.hpp") +find_package(whisper CONFIG REQUIRED) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_definitions(-D_DEBUG) +endif() + +if(WIN32) + add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -DNOGDI) +endif() + +add_bare_module(bci-whispercpp EXPORTS) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(${bci-whispercpp}_module PRIVATE -Wl,--exclude-libs,ALL) +endif() + +target_sources( + ${bci-whispercpp} + PRIVATE + ${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp + ${PROJECT_SOURCE_DIR}/addon/src/js-interface/JSAdapter.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIConfig.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIModel.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/NeuralProcessor.cpp +) + +target_include_directories( + ${bci-whispercpp} + PRIVATE + ${PROJECT_SOURCE_DIR}/addon + ${PROJECT_SOURCE_DIR}/addon/src + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} +) + +target_link_libraries( + ${bci-whispercpp} + PRIVATE + whisper::whisper +) + +target_compile_definitions(${bci-whispercpp} PUBLIC JS_LOGGER) + +if(WIN32) + target_link_libraries( + ${bci-whispercpp} + PRIVATE + msvcrt.lib + ) +endif() + +if(BUILD_TESTING) + find_package(GTest REQUIRED) + + set(CORE_SRCS + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIConfig.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIModel.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/NeuralProcessor.cpp + ) + + add_library(bci-core STATIC ${CORE_SRCS}) + + target_link_libraries(bci-core PRIVATE + whisper::whisper + ) + + target_include_directories(bci-core PRIVATE + ${PROJECT_SOURCE_DIR}/addon/ + ${PROJECT_SOURCE_DIR}/addon/src/ + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} + ) + + add_executable( + test-bci-core + ${PROJECT_SOURCE_DIR}/addon/tests/test_core.cpp + ) + + target_include_directories(test-bci-core PRIVATE + ${PROJECT_SOURCE_DIR}/addon/ + ${PROJECT_SOURCE_DIR}/addon/src/ + ${PROJECT_SOURCE_DIR}/addon/src/model-interface + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/ + ${PROJECT_SOURCE_DIR}/addon/tests/ + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} + ) + + target_link_libraries(test-bci-core PRIVATE + bci-core + whisper::whisper + GTest::gtest_main + GTest::gmock + ) + + set_target_properties(test-bci-core PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/addon/tests + ) +endif() diff --git a/packages/bci-whispercpp/LICENSE b/packages/bci-whispercpp/LICENSE new file mode 100644 index 0000000000..7d199ae333 --- /dev/null +++ b/packages/bci-whispercpp/LICENSE @@ -0,0 +1,179 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +Copyright 2026 Tether Data, S.A. de C.V. diff --git a/packages/bci-whispercpp/NOTICE b/packages/bci-whispercpp/NOTICE new file mode 100644 index 0000000000..d2d9086b7b --- /dev/null +++ b/packages/bci-whispercpp/NOTICE @@ -0,0 +1,108 @@ +@qvac/bci-whispercpp +Copyright 2026 Tether Data, S.A. de C.V. + +This product includes third-party components under their +respective licenses. @qvac/bci-whispercpp itself is licensed under +Apache-2.0; bundled dependencies are governed by the licenses +listed below. + +========================================================================= +JavaScript Dependencies +========================================================================= + +--- apache-2.0 (Apache License 2.0) --- + + @qvac/error@0.1.1 + @qvac/logging@0.1.0 + b4a@1.8.0 + https://github.com/holepunchto/b4a + bare-buffer@3.6.0 + https://github.com/holepunchto/bare-buffer + bare-events@2.8.2 + https://github.com/holepunchto/bare-events + bare-os@3.8.7 + https://github.com/holepunchto/bare-os + bare-path@3.0.0 + https://github.com/holepunchto/bare-path + bare-stream@2.12.0 + https://github.com/holepunchto/bare-stream + events-universal@1.0.1 + https://github.com/holepunchto/events-universal + text-decoder@1.2.7 + https://github.com/holepunchto/text-decoder + +--- mit (MIT License) --- + + fast-fifo@1.3.2 + https://github.com/mafintosh/fast-fifo + streamx@2.25.0 + https://github.com/mafintosh/streamx + teex@1.0.1 + https://github.com/mafintosh/teex + + +========================================================================= +Python Dependencies (model conversion tooling only) +========================================================================= + +The scripts/convert-model.py tool used to convert BrainWhisperer +PyTorch checkpoints to the GGML + embedder binary format requires: + +--- bsd-3-clause (BSD 3-Clause License) --- + + numpy + https://numpy.org + torch + https://pytorch.org + +--- apache-2.0 (Apache License 2.0) --- + + transformers + https://github.com/huggingface/transformers + (used by convert-model.py for WhisperTokenizer) + + +========================================================================= +C++ Dependencies +========================================================================= + +--- apache-2.0 (Apache License 2.0) --- + + qvac-lib-inference-addon-cpp + https://github.com/tetherto/qvac + Copyright (c) 2024-2026 Tether Data, S.A. de C.V. + +--- mit (MIT License) --- + + whisper-cpp + https://github.com/tetherto/qvac-ext-lib-whisper.cpp + (BCI-patched fork of https://github.com/ggml-org/whisper.cpp) + + ggml + https://github.com/ggml-org/ggml + (bundled with whisper.cpp) + +--- bsd-3-clause (BSD 3-Clause License) --- + + gtest + https://github.com/google/googletest + (test-only dependency) + +--- apache-2.0 with llvm-exception (Apache License 2.0 with LLVM Exception) --- + + libc++ (LLVM C++ Standard Library) + https://github.com/llvm/llvm-project + (runtime dependency on Linux targets via -stdlib=libc++) + + +========================================================================= +Model Attribution +========================================================================= + +Neural-signal-to-text transcription uses a derived BCI-trained whisper +model. The bci-embedder.bin weight file contains day-specific projection +matrices derived from the BrainWhisperer research project +(https://github.com/cffan/neural_seq_decoder). End users must obtain +the upstream research checkpoint and convert it locally using +scripts/convert-model.py; no model weights are distributed with this +package. diff --git a/packages/bci-whispercpp/PULL_REQUEST_TEMPLATE.md b/packages/bci-whispercpp/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000..0240796181 --- /dev/null +++ b/packages/bci-whispercpp/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,42 @@ +**Note**: be concise and prefer bullet points. + +## ๐ŸŽฏ What problem does this PR solve? + +- +- + +## ๐Ÿ“ How does it solve it? + +- +- + +## ๐Ÿงช How was it tested? + +**Delete this section if not applicable.** + +- +- + +## ๐Ÿ’ฅ Breaking Changes + +**Delete this section if not applicable.** + +**BEFORE:** + +```javascript +// old code example +``` + +**AFTER:** + +```javascript +// new code example +``` + +## ๐Ÿ”Œ API Changes + +**Delete this section if not applicable.** + +```javascript +// new API usage example +``` diff --git a/packages/bci-whispercpp/README.md b/packages/bci-whispercpp/README.md new file mode 100644 index 0000000000..9474d9582d --- /dev/null +++ b/packages/bci-whispercpp/README.md @@ -0,0 +1,201 @@ +# @qvac/bci-whispercpp + +Brain-Computer Interface (BCI) neural signal transcription addon for qvac, powered by the [tetherto/qvac-ext-lib-whisper.cpp](https://github.com/tetherto/qvac-ext-lib-whisper.cpp) fork of whisper.cpp. + +Transcribes multi-channel neural signals (e.g., 512-channel microelectrode array recordings) into text using a BCI-trained whisper model running natively via GGML. Output matches the Python BrainWhisperer reference model exactly. + +## Architecture + +``` +Neural Signal (512ch, 20ms bins) + โ”‚ + โ–ผ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ NeuralProcessor (C++) โ”‚ +โ”‚ - Gaussian smoothing โ”‚ std=2, kernel=100 +โ”‚ - Day-specific projection โ”‚ low-rank (AยทB) + month + softsign +โ”‚ - Pad to 3000 frames โ”‚ mel-major layout for whisper.cpp +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ mel features (512 ร— 3000) + โ–ผ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ whisper.cpp (patched) โ”‚ +โ”‚ - conv1 (k=7, 512โ†’384) โ”‚ BCI-trained embedder weights +โ”‚ - conv2 (k=3, stride=2) โ”‚ +โ”‚ - Positional encoding โ”‚ learned time PE + sinusoidal day PE +โ”‚ - 6-layer encoder โ”‚ windowed attention (w=57) on layers 0โ€“3 +โ”‚ - 4-layer decoder (LoRA) โ”‚ beam search, length_penalty=0.14 +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ–ผ + Text output +``` + +## Results + +Native GGML inference matches the Python BrainWhisperer reference on all test samples: + +| Sample | Ground Truth | GGML Native Output | WER | +|--------|-------------|-------------------|-----| +| 0 | "You can see the code at this point as well." | "You can see the good at this point as well." | 10.0% | +| 1 | "How does it keep the cost down?" | "How does it keep the cost down?" | 0.0% | +| 2 | "Not too controversial." | "Not too controversial." | 0.0% | +| 3 | "The jury and a judge work together on it." | "The jury and a judge work together on it." | 0.0% | +| 4 | "Were quite vocal about it." | "We're quite vocal about it." | 20.0% | +| **Average** | | | **6.0%** | + +## Neural Signal Format + +Binary files with the following layout: + +| Offset | Type | Description | +|--------|-----------|------------------------------------------------------| +| 0 | uint32 | Number of timesteps | +| 4 | uint32 | Number of channels | +| 8 | float32[] | Feature data (row-major: `features[t * channels + c]`) | + +Each timestep represents a 20ms bin of neural activity. Channels correspond to individual electrodes in a microelectrode array (typically 512 channels). + +## Installation + +```bash +cd packages/bci-whispercpp +npm install +VCPKG_ROOT=/path/to/vcpkg npm run build +``` + +### Prerequisites + +- **Bare runtime** >= 1.24.0 +- **CMake** >= 3.25 +- **vcpkg** with `VCPKG_ROOT` environment variable set + +### Model Conversion Prerequisites + +- **Python 3** with `numpy`, `torch`, and `transformers` (`pip install numpy torch transformers`) + +### Model Conversion + +Convert a trained BrainWhisperer checkpoint. This produces **two files**, both required for inference: + +| File | Size | Description | +|------|------|-------------| +| `ggml-bci-windowed.bin` | ~84 MB | GGML model: whisper encoder/decoder (LoRA-merged), tokenizer, positional embedding, windowed attention header | +| `bci-embedder.bin` | ~24 MB | Day projection weights: low-rank AยทB matrices per recording day, month projections, session-to-day mapping | + +```bash +python3 scripts/convert-model.py \ + --checkpoint /path/to/epoch=93-val_wer=0.0910.ckpt +``` + +Both files are written to `models/` by default. All flags are optional: + +| Flag | Default | Description | +|------|---------|-------------| +| `--output` | `models/ggml-bci-windowed.bin` | GGML model output path | +| `--embedder-output` | `models/bci-embedder.bin` | Embedder weights output path | +| `--day-idx` | `1` | Day index for baked positional embedding | +| `--window-size` | `57` | Windowed attention size (0 to disable) | +| `--last-window-layer` | `3` | Last encoder layer with windowed attention | +| `--f32` | off | Use f32 for all tensors (avoids f16 precision loss, ~2x larger) | + +**Important:** Both files must be in the same directory at runtime. The C++ addon looks for `bci-embedder.bin` next to the GGML model file and will fail if it is missing. + +## Usage + +### Low-level API (BCIInterface) + +```javascript +const { BCIInterface } = require('@qvac/bci-whispercpp/bci') +const binding = require('@qvac/bci-whispercpp/binding') + +const config = { + contextParams: { model: '/path/to/ggml-bci.bin' }, + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false }, + bciConfig: { day_idx: 1 } +} + +const onOutput = (addon, event, jobId, data, error) => { + if (event === 'Output') console.log('Segment:', data[0]?.text) + if (event === 'JobEnded') console.log('Done:', data) + if (event === 'Error') console.error('Error:', error) +} + +const model = new BCIInterface(binding, config, onOutput) +await model.activate() + +// Batch mode โ€” pass entire signal at once +const neuralData = fs.readFileSync('signal.bin') +await model.runJob({ input: new Uint8Array(neuralData) }) + +// Streaming mode โ€” send chunks then signal end +await model.append({ type: 'neural', input: chunk1 }) +await model.append({ type: 'neural', input: chunk2 }) +await model.append({ type: 'end of job' }) + +await model.destroyInstance() +``` + +## Testing + +### Integration Tests + +```bash +WHISPER_MODEL_PATH=./models/ggml-bci-windowed.bin npm run test:integration +``` + +### C++ Unit Tests + +```bash +VCPKG_ROOT=/path/to/vcpkg npm run test:cpp +``` + +## Configuration + +### whisperConfig + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `language` | string | `"en"` | Language code | +| `temperature` | number | `0.0` | Sampling temperature | +| `n_threads` | number | `0` (auto) | Number of threads | + +### bciConfig + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `day_idx` | number | `0` | Session day index for day-specific projection | + +### contextParams + +| Parameter | Type | Description | +|-----------|------|-------------| +| `model` | string | **Required.** Path to BCI GGML model file | +| `use_gpu` | boolean | Enable GPU acceleration | +| `flash_attn` | boolean | Enable flash attention | + +## whisper.cpp Patches + +The BCI patches live in the `tetherto/qvac-ext-lib-whisper.cpp` fork (v1.8.4.2) and are consumed via the `qvac-registry-vcpkg` port: + +| Feature | Description | +|---------|-------------| +| Variable conv1 kernel | Read `n_audio_conv1_kernel` from model header (k=7 for 512ch BCI vs k=3 for audio) | +| Windowed attention | Attention mask with configurable window size/layer params in header | +| BCI SOS tokens | BCI-specific start-of-sequence token handling | +| Graph placement fix | Correct encoder-graph mask population for the encoder graph refactor | + +## Platform Support + +| Platform | Architecture | Status | +|----------|-------------|--------| +| macOS | arm64 (Apple Silicon) | Tested | +| Linux | x64 | Feasible (same build system as transcription-whispercpp) | +| Windows | x64 | Feasible (whisper.cpp supports MSVC) | +| Android | arm64 | Feasible (NDK toolchain) | +| iOS | arm64 | Feasible (Xcode toolchain) | + +## License + +Apache-2.0 diff --git a/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp b/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp new file mode 100644 index 0000000000..d849efb23e --- /dev/null +++ b/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp @@ -0,0 +1,170 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "model-interface/BCITypes.hpp" +#include "model-interface/bci/BCIModel.hpp" +#include "src/js-interface/JSAdapter.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace js = qvac_lib_inference_addon_cpp::js; +using qvac_lib_inference_addon_cpp::OutputQueue; + +inline BCIConfig +createBCIConfig(js_env_t* env, const js::Object& configurationParams) { + JSAdapter adapter; + return adapter.loadFromJSObject(configurationParams, env); +} + +struct JsTranscriptOutputHandler + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler { + JsTranscriptOutputHandler() + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + Transcript>([this](const Transcript& output) -> js_value_t* { + auto jsTranscript = js::Object::create(this->env_); + jsTranscript.setProperty( + this->env_, "text", js::String::create(this->env_, output.text)); + jsTranscript.setProperty( + this->env_, "toAppend", + js::Boolean::create(this->env_, output.toAppend)); + jsTranscript.setProperty( + this->env_, "start", + js::Number::create(this->env_, output.start)); + jsTranscript.setProperty( + this->env_, "end", + js::Number::create(this->env_, output.end)); + jsTranscript.setProperty( + this->env_, "id", + js::Number::create(this->env_, static_cast(output.id))); + return jsTranscript; + }) {} +}; + +struct JsTranscriptArrayOutputHandler + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + std::vector> { + JsTranscriptArrayOutputHandler() + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + std::vector>( + [this](const std::vector& output) -> js_value_t* { + auto jsOutput = js::Array::create(this->env_); + for (size_t i = 0; i < output.size(); ++i) { + auto jsTranscript = js::Object::create(this->env_); + jsTranscript.setProperty( + this->env_, "text", + js::String::create(this->env_, output[i].text)); + jsTranscript.setProperty( + this->env_, "toAppend", + js::Boolean::create(this->env_, output[i].toAppend)); + jsTranscript.setProperty( + this->env_, "start", + js::Number::create(this->env_, output[i].start)); + jsTranscript.setProperty( + this->env_, "end", + js::Number::create(this->env_, output[i].end)); + jsTranscript.setProperty( + this->env_, "id", + js::Number::create( + this->env_, static_cast(output[i].id))); + jsOutput.set(this->env_, i, jsTranscript); + } + return jsOutput; + }) {} +}; + +inline js_value_t* createInstance(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + static std::once_flag whisperLogOnce; + std::call_once(whisperLogOnce, []() { + whisper_log_set( + [](enum ggml_log_level level, const char* text, void*) { + if (text == nullptr) return; + auto prio = (level == GGML_LOG_LEVEL_ERROR) + ? qvac_lib_inference_addon_cpp::logger::Priority::ERROR + : (level == GGML_LOG_LEVEL_WARN) + ? qvac_lib_inference_addon_cpp::logger::Priority::WARNING + : qvac_lib_inference_addon_cpp::logger::Priority::DEBUG; + QLOG(prio, std::string("[whisper.cpp] ") + text); + }, + nullptr); + }); + JsArgsParser args(env, info); + auto configurationParams = args.getJsObject(1, "configurationParams"); + + unique_ptr model = + make_unique(createBCIConfig(env, configurationParams)); + + out_handl::OutputHandlers outputHandlers; + outputHandlers.add(make_shared()); + outputHandlers.add(make_shared()); + unique_ptr callback = make_unique( + env, + args.get(0, "jsHandle"), + args.getFunction(2, "outputCallback"), + std::move(outputHandlers)); + + auto addon = make_unique(env, std::move(callback), std::move(model)); + return JsInterface::createInstance(env, std::move(addon)); +} +JSCATCH + +inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + JsArgsParser args(env, info); + AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance")); + auto [type, jsInput] = JsInterface::getInput(args); + + if (type != "neural") { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Unknown input type: " + type + " (expected 'neural')"); + } + + vector neuralBytes = + js::TypedArray(env, jsInput).as>(env); + return instance.runJob(std::any(std::move(neuralBytes))); +} +JSCATCH + +inline js_value_t* reload(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + JsArgsParser args(env, info); + AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance")); + auto configurationParams = args.getJsObject(1, "configurationParams"); + BCIConfig config = createBCIConfig(env, configurationParams); + + return js::JsAsyncTask::run( + env, + [addonCpp = instance.addonCpp, config = std::move(config)]() mutable { + auto* bciModel = + dynamic_cast(&addonCpp->model.get()); + if (bciModel == nullptr) { + throw std::runtime_error("Invalid model type for reload"); + } + bciModel->setConfig(config); + }); +} +JSCATCH + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp b/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp new file mode 100644 index 0000000000..15b546b4d6 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include +#include + +#include "qvac-lib-inference-addon-cpp/Errors.hpp" + +namespace qvac_lib_inference_addon_bci::errors { +constexpr const char* ADDON_ID = "BCI"; +} // namespace qvac_lib_inference_addon_bci::errors + +namespace qvac_errors { +namespace bci_error { +enum class Code : std::uint8_t { + InvalidNeuralSignal, + FailedToLoadModel, + EmbedderWeightsNotFound, +}; + +inline const char* codeName(Code code) { + switch (code) { + case Code::InvalidNeuralSignal: + return "InvalidNeuralSignal"; + case Code::FailedToLoadModel: + return "FailedToLoadModel"; + case Code::EmbedderWeightsNotFound: + return "EmbedderWeightsNotFound"; + } + return "BCIError"; +} + +inline qvac_errors::StatusError +makeStatus(Code code, const std::string& message) { + return qvac_errors::StatusError( + qvac_lib_inference_addon_bci::errors::ADDON_ID, + codeName(code), + message); +} +} // namespace bci_error +} // namespace qvac_errors diff --git a/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp new file mode 100644 index 0000000000..c9a10a9521 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp @@ -0,0 +1,126 @@ +#include "JSAdapter.hpp" + +#include +#include +#include + +#include + +using namespace qvac_lib_inference_addon_cpp::js; + +namespace qvac_lib_inference_addon_bci { + +namespace { + +auto getPropertyNames(js_env_t* env, Object object) -> Array { + js_value_t* propertyNames; + JS(js_get_property_names(env, object, &propertyNames)); + return Array::fromValue(propertyNames); +} + +auto getValueType(js_env_t* env, js_value_t* value) -> js_value_type_t { + js_value_type_t valueType; + JS(js_typeof(env, value, &valueType)); + return valueType; +} + +template +void addConfigParam( + std::map& cfg, std::string&& key, T&& value) { + if (auto e = cfg.try_emplace(std::move(key), std::forward(value)); + !e.second) { + std::ostringstream oss; + oss << "key '" << key << "' already exists"; + throw std::runtime_error{oss.str()}; + } +} + +} // namespace + +void JSAdapter::loadMap( + Object jsObject, js_env_t* env, + std::map& output) { + + auto names = getPropertyNames(env, jsObject); + auto namesSize = names.size(env); + for (auto i = 0; i < namesSize; ++i) { + auto key = names.get(env, i); + auto value = jsObject.getProperty(env, key); + switch (getValueType(env, value)) { + case js_boolean: + addConfigParam( + output, + key.as(env), + Boolean::fromValue(value).as(env)); + break; + case js_number: + addConfigParam( + output, + key.as(env), + Number::fromValue(value).as(env)); + break; + case js_string: + addConfigParam( + output, + key.as(env), + String::fromValue(value).as(env)); + break; + case js_object: + continue; + case js_function: + continue; + default: + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Invalid type for key: " + key.as(env) + + " is not supported"); + } + } +} + +BCIConfig JSAdapter::loadFromJSObject(Object jsObject, js_env_t* env) { + BCIConfig config; + + auto whisperConfigObj = + jsObject.getOptionalProperty(env, "whisperConfig"); + if (whisperConfigObj.has_value()) { + loadMap(whisperConfigObj.value(), env, config.whisperMainCfg); + } + + auto contextParamsObj = + jsObject.getOptionalProperty(env, "contextParams"); + if (contextParamsObj.has_value()) { + loadContextParams(contextParamsObj.value(), env, config); + } + + auto miscConfigObj = + jsObject.getOptionalProperty(env, "miscConfig"); + if (miscConfigObj.has_value()) { + loadMiscParams(miscConfigObj.value(), env, config); + } + + auto bciConfigObj = + jsObject.getOptionalProperty(env, "bciConfig"); + if (bciConfigObj.has_value()) { + loadBCIParams(bciConfigObj.value(), env, config); + } + + return config; +} + +void JSAdapter::loadContextParams( + Object contextParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(contextParamsObj, env, config.whisperContextCfg); +} + +void JSAdapter::loadMiscParams( + Object miscParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(miscParamsObj, env, config.miscConfig); +} + +void JSAdapter::loadBCIParams( + Object bciParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(bciParamsObj, env, config.bciConfig); +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp new file mode 100644 index 0000000000..bb3c34e0c2 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + +#include + +#include "addon/BCIErrors.hpp" +#include "model-interface/bci/BCIConfig.hpp" +#include "qvac-lib-inference-addon-cpp/Errors.hpp" + +namespace qvac_lib_inference_addon_cpp::js { +class Object; +} + +namespace qvac_lib_inference_addon_bci { + +class JSAdapter { +public: + JSAdapter() = default; + + auto loadFromJSObject( + qvac_lib_inference_addon_cpp::js::Object jsObject, js_env_t* env) + -> BCIConfig; + + void loadContextParams( + qvac_lib_inference_addon_cpp::js::Object contextParamsObj, js_env_t* env, + BCIConfig& config); + + void loadMiscParams( + qvac_lib_inference_addon_cpp::js::Object miscParamsObj, js_env_t* env, + BCIConfig& config); + + void loadBCIParams( + qvac_lib_inference_addon_cpp::js::Object bciParamsObj, js_env_t* env, + BCIConfig& config); + +private: + void loadMap( + qvac_lib_inference_addon_cpp::js::Object jsObject, js_env_t* env, + std::map& output); +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/js-interface/binding.cpp b/packages/bci-whispercpp/addon/src/js-interface/binding.cpp new file mode 100644 index 0000000000..3a9a90072c --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/binding.cpp @@ -0,0 +1,39 @@ +#include + +#include "src/addon/AddonJs.hpp" + +// NOLINTBEGIN(cppcoreguidelines-macro-usage,readability-function-cognitive-complexity,modernize-use-trailing-return-type,readability-identifier-naming) +auto qvac_lib_inference_addon_bci_exports( + js_env_t* env, + js_value_t* exports) + -> js_value_t* { // NOLINT(readability-identifier-naming) + +#define V(name, fn) \ + { \ + js_value_t* val; \ + if (js_create_function(env, name, -1, fn, nullptr, &val) != 0) { \ + return nullptr; \ + } \ + if (js_set_named_property(env, exports, name, val) != 0) { \ + return nullptr; \ + } \ + } + + V("createInstance", qvac_lib_inference_addon_bci::createInstance) + V("runJob", qvac_lib_inference_addon_bci::runJob) + V("reload", qvac_lib_inference_addon_bci::reload) + V("loadWeights", qvac_lib_inference_addon_cpp::JsInterface::loadWeights) + V("activate", qvac_lib_inference_addon_cpp::JsInterface::activate) + V("cancel", qvac_lib_inference_addon_cpp::JsInterface::cancel) + V("destroyInstance", + qvac_lib_inference_addon_cpp::JsInterface::destroyInstance) + V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger) + V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger) +#undef V + + return exports; +} + +BARE_MODULE( + qvac_lib_inference_addon_bci, qvac_lib_inference_addon_bci_exports) +// NOLINTEND(cppcoreguidelines-macro-usage,readability-function-cognitive-complexity,modernize-use-trailing-return-type,readability-identifier-naming) diff --git a/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp b/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp new file mode 100644 index 0000000000..900ee86d97 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + +namespace qvac_lib_inference_addon_bci { + +struct Transcript { + std::string text; + bool toAppend; + float start; + float end; + size_t id; + + Transcript() : toAppend{false}, start(-1.0F), end(-1.0F), id{0} {} + + explicit Transcript(std::string_view strView) + : text{strView}, toAppend{false}, start{-1.0F}, end{-1.0F}, id{0} {} +}; + +struct NeuralSignalHeader { + uint32_t numTimesteps; + uint32_t numChannels; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp new file mode 100644 index 0000000000..981c377cb5 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp @@ -0,0 +1,310 @@ +#include "BCIConfig.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qvac-lib-inference-addon-cpp/Errors.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace { + +// JS Number values arrive as double through the binding layer. Convert them +// safely to the target integer type, validating that the value is finite and +// within range. +int toInt(const JSValueVariant& v, const std::string& key) { + if (const auto* d = std::get_if(&v)) { + if (!std::isfinite(*d)) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a finite number"); + } + if (*d < static_cast(std::numeric_limits::min()) || + *d > static_cast(std::numeric_limits::max())) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " is out of int32 range"); + } + if (std::floor(*d) != *d) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be an integer"); + } + return static_cast(*d); + } + if (const auto* i = std::get_if(&v)) { + return *i; + } + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a number"); +} + +float toFloat(const JSValueVariant& v, const std::string& key) { + if (const auto* d = std::get_if(&v)) { + if (!std::isfinite(*d)) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a finite number"); + } + return static_cast(*d); + } + if (const auto* i = std::get_if(&v)) { + return static_cast(*i); + } + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a number"); +} + +bool toBool(const JSValueVariant& v, const std::string& key) { + if (const auto* b = std::get_if(&v)) { + return *b; + } + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a boolean"); +} + +const std::string& toString(const JSValueVariant& v, const std::string& key) { + if (const auto* s = std::get_if(&v)) { + return *s; + } + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + key + " must be a string"); +} + +int computeOptimalThreads() { + const unsigned hw = std::thread::hardware_concurrency(); + return hw > 0 ? static_cast(std::min(hw, 16U)) : 4; +} + +void ensureRange(const std::string& key, double value, double lo, double hi) { + if (value < lo || value > hi) { + std::ostringstream oss; + oss << key << " must be in [" << lo << ", " << hi << "], got " << value; + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, oss.str()); + } +} + +} // namespace + +std::string convertVariantToString(const JSValueVariant& value) { + return std::visit( + [](const auto& v) -> std::string { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return "null"; + } else if constexpr (std::is_same_v) { + return std::to_string(v); + } else if constexpr (std::is_same_v) { + std::ostringstream oss; + oss << v; + return oss.str(); + } else if constexpr (std::is_same_v) { + return v; + } else if constexpr (std::is_same_v) { + return v ? "true" : "false"; + } + return "unknown"; + }, + value); +} + +const HandlersMap& getWhisperMainHandlers() { + static const HandlersMap handlers = { + {"language", + [](whisper_full_params& /*p*/, const JSValueVariant& /*v*/) { + // Language is handled separately in toWhisperFullParams via + // BCIConfig::lang_ to avoid static-local lifetime issues. + }}, + {"n_threads", + [](whisper_full_params& p, const JSValueVariant& v) { + int n = toInt(v, "n_threads"); + if (n < 0) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "n_threads must be >= 0"); + } + p.n_threads = (n == 0) ? computeOptimalThreads() : n; + }}, + {"translate", + [](whisper_full_params& p, const JSValueVariant& v) { + p.translate = toBool(v, "translate"); + }}, + {"no_timestamps", + [](whisper_full_params& p, const JSValueVariant& v) { + p.no_timestamps = toBool(v, "no_timestamps"); + }}, + {"single_segment", + [](whisper_full_params& p, const JSValueVariant& v) { + p.single_segment = toBool(v, "single_segment"); + }}, + {"temperature", + [](whisper_full_params& p, const JSValueVariant& v) { + float t = toFloat(v, "temperature"); + ensureRange("temperature", t, 0.0, 2.0); + p.temperature = t; + }}, + {"suppress_nst", + [](whisper_full_params& p, const JSValueVariant& v) { + p.suppress_nst = toBool(v, "suppress_nst"); + }}, + {"suppress_blank", + [](whisper_full_params& p, const JSValueVariant& v) { + p.suppress_blank = toBool(v, "suppress_blank"); + }}, + {"duration_ms", + [](whisper_full_params& p, const JSValueVariant& v) { + int ms = toInt(v, "duration_ms"); + if (ms < 0) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "duration_ms must be >= 0"); + } + p.duration_ms = ms; + }}, + {"print_special", + [](whisper_full_params& p, const JSValueVariant& v) { + p.print_special = toBool(v, "print_special"); + }}, + {"print_progress", + [](whisper_full_params& p, const JSValueVariant& v) { + p.print_progress = toBool(v, "print_progress"); + }}, + {"print_realtime", + [](whisper_full_params& p, const JSValueVariant& v) { + p.print_realtime = toBool(v, "print_realtime"); + }}, + {"print_timestamps", + [](whisper_full_params& p, const JSValueVariant& v) { + p.print_timestamps = toBool(v, "print_timestamps"); + }}, + {"detect_language", + [](whisper_full_params& p, const JSValueVariant& v) { + p.detect_language = toBool(v, "detect_language"); + }}, + {"greedy_best_of", + [](whisper_full_params& p, const JSValueVariant& v) { + int b = toInt(v, "greedy_best_of"); + ensureRange("greedy_best_of", b, 1, 32); + p.greedy.best_of = b; + }}, + {"beam_search_beam_size", + [](whisper_full_params& p, const JSValueVariant& v) { + int b = toInt(v, "beam_search_beam_size"); + ensureRange("beam_search_beam_size", b, 1, 32); + p.beam_search.beam_size = b; + }}, + }; + return handlers; +} + +const HandlersMap& getWhisperContextHandlers() { + static const HandlersMap handlers = { + {"use_gpu", + [](whisper_context_params& p, const JSValueVariant& v) { + p.use_gpu = toBool(v, "use_gpu"); + }}, + {"flash_attn", + [](whisper_context_params& p, const JSValueVariant& v) { + p.flash_attn = toBool(v, "flash_attn"); + }}, + {"gpu_device", + [](whisper_context_params& p, const JSValueVariant& v) { + int d = toInt(v, "gpu_device"); + if (d < 0) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "gpu_device must be >= 0"); + } + p.gpu_device = d; + }}, + {"model", + [](whisper_context_params& /*p*/, const JSValueVariant& v) { + // Consumed directly from whisperContextCfg["model"] in BCIModel::load. + (void)toString(v, "model"); + }}, + }; + return handlers; +} + +whisper_full_params toWhisperFullParams(BCIConfig& bciConfig) { + whisper_full_params params = whisper_full_default_params( + WHISPER_SAMPLING_BEAM_SEARCH); + + // BCI defaults matching the Python notebook's decode settings + params.beam_search.beam_size = 4; + params.suppress_nst = false; + params.suppress_blank = false; + params.temperature = 0.0F; + params.no_timestamps = true; + params.single_segment = true; + params.no_context = true; + params.length_penalty = 0.14F; + params.max_initial_ts = 0; + + const auto& handlers = getWhisperMainHandlers(); + for (const auto& [key, value] : bciConfig.whisperMainCfg) { + auto it = handlers.find(key); + if (it == handlers.end()) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Unknown whisperConfig key: " + key); + } + try { + it->second(params, value); + } catch (const qvac_errors::StatusError&) { + throw; + } catch (const std::exception& e) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "error in whisperConfig handler: " + key + " | " + e.what()); + } + } + + auto langIt = bciConfig.whisperMainCfg.find("language"); + if (langIt != bciConfig.whisperMainCfg.end()) { + if (auto* s = std::get_if(&langIt->second)) { + bciConfig.lang_ = *s; + params.language = bciConfig.lang_.c_str(); + } + } + + return params; +} + +whisper_context_params toWhisperContextParams(const BCIConfig& bciConfig) { + whisper_context_params params = whisper_context_default_params(); + + const auto& handlers = getWhisperContextHandlers(); + for (const auto& [key, value] : bciConfig.whisperContextCfg) { + auto it = handlers.find(key); + if (it == handlers.end()) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Unknown contextParams key: " + key); + } + try { + it->second(params, value); + } catch (const qvac_errors::StatusError&) { + throw; + } catch (const std::exception& e) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "error in contextParams handler: " + key + " | " + e.what()); + } + } + + return params; +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp new file mode 100644 index 0000000000..df1b0ac75c --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace qvac_lib_inference_addon_bci { + +using JSValueVariant = + std::variant; + +template +using HandlerFunction = std::function; + +template +using HandlersMap = std::unordered_map>; + +struct BCIConfig { + std::map miscConfig; + std::map whisperMainCfg; + std::map whisperContextCfg; + std::map bciConfig; + + // Owned storage for string values that whisper_full_params references by + // pointer (e.g. p.language = lang_.c_str()). Must outlive the params struct. + mutable std::string lang_; +}; + +whisper_full_params toWhisperFullParams(BCIConfig& bciConfig); +whisper_context_params toWhisperContextParams(const BCIConfig& bciConfig); + +std::string convertVariantToString(const JSValueVariant& value); + +// Maps of handler functions for setting whisper_full_params fields from JS. +const HandlersMap& getWhisperMainHandlers(); +const HandlersMap& getWhisperContextHandlers(); + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp new file mode 100644 index 0000000000..9ef986b98c --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp @@ -0,0 +1,392 @@ +#include "BCIModel.hpp" + +#include +#include +#include +#include +#include +#include + +#include "BCIConfig.hpp" +#include "addon/BCIErrors.hpp" +#include "model-interface/BCITypes.hpp" +#include "qvac-lib-inference-addon-cpp/Errors.hpp" +#include "qvac-lib-inference-addon-cpp/Logger.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace { +constexpr float K_SEGMENT_TIMESTAMP_SCALE = 0.01F; +constexpr int K_WARMUP_SAMPLE_COUNT = 8000; +constexpr int K_DUMMY_AUDIO_30S = 16000 * 30; +} // namespace + +static bool shouldAbortWhisper(void* userData) { + const auto* cancelRequested = static_cast(userData); + return cancelRequested != nullptr && + cancelRequested->load(std::memory_order_relaxed); +} + +// Called right before the encoder runs. Replaces the mel spectrogram +// (computed from dummy silence) with our neural-signal-derived features. +static bool onEncoderBegin( + whisper_context* ctx, whisper_state* state, void* userData) { + auto* cbData = static_cast(userData); + if (cbData == nullptr || cbData->melData == nullptr) { + return true; + } + + int result = whisper_set_mel_with_state( + cbData->ctx, state, + cbData->melData, cbData->melFrames, cbData->melBins); + + if (result != 0) { + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::ERROR, + "whisper_set_mel_with_state failed: " + std::to_string(result)); + return false; + } + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG, + "Injected neural mel features: " + + std::to_string(cbData->melFrames) + " frames x " + + std::to_string(cbData->melBins) + " bins"); + return true; +} + +BCIModel::BCIModel(BCIConfig config) + : cfg_(std::move(config)), neuralProcessor_() {} + +BCIModel::~BCIModel() noexcept { + try { + unload(); + } catch (...) { + is_loaded_ = false; + } +} + +void BCIModel::loadEmbedderIfNeeded() { + if (neuralProcessor_.hasWeights()) { + return; + } + + // Look for embedder weights next to the model file + auto modelPathIt = cfg_.whisperContextCfg.find("model"); + if (modelPathIt == cfg_.whisperContextCfg.end()) { + return; + } + const auto modelPath = std::get(modelPathIt->second); + + auto lastSep = modelPath.find_last_of("/\\"); + auto dir = (lastSep != std::string::npos) + ? modelPath.substr(0, lastSep) + : "."; + auto embedderPath = dir + "/bci-embedder.bin"; + + if (neuralProcessor_.loadEmbedderWeights(embedderPath)) { + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loaded BCI embedder weights from: " + embedderPath); + } else { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::EmbedderWeightsNotFound, + "BCI embedder weights not found at: " + embedderPath + + ". This file is required for neural signal preprocessing. " + "Generate it with: python3 scripts/convert-model.py --checkpoint "); + } +} + +void BCIModel::load() { + if (ctx_) return; + + whisper_context_params contextParams = toWhisperContextParams(cfg_); + + const auto modelPathIt = cfg_.whisperContextCfg.find("model"); + if (modelPathIt == cfg_.whisperContextCfg.end()) { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Model path not specified in contextParams"); + } + const auto modelPath = std::get(modelPathIt->second); + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loading BCI model from: " + modelPath); + + auto* rawCtx = whisper_init_from_file_with_params(modelPath.c_str(), contextParams); + if (rawCtx == nullptr) { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::FailedToLoadModel, + "Failed to initialize Whisper context from: " + modelPath); + } + + try { + ctx_.reset(rawCtx); + loadEmbedderIfNeeded(); + if (!is_warmed_up_) { + warmup(); + is_warmed_up_ = true; + } + is_loaded_ = true; + } catch (...) { + ctx_.reset(); + is_loaded_ = false; + throw; + } +} + +void BCIModel::unload() { + resetContext(); + is_loaded_ = false; + is_warmed_up_ = false; +} + +void BCIModel::reload() { + unload(); + load(); +} + +void BCIModel::reset() { + output_.clear(); + totalTokens_ = 0; + totalSegments_ = 0; + processCalls_ = 0; + totalWallMs_ = 0.0; + whisperSampleMs_ = 0.0; + whisperEncodeMs_ = 0.0; + whisperDecodeMs_ = 0.0; + whisperBatchdMs_ = 0.0; + whisperPromptMs_ = 0.0; +} + +qvac_lib_inference_addon_cpp::RuntimeStats BCIModel::runtimeStats() const { + qvac_lib_inference_addon_cpp::RuntimeStats stats; + + const double totalTimeSec = totalWallMs_ / 1000.0; + const double tps = totalTimeSec > 0.0 + ? (static_cast(totalTokens_) / totalTimeSec) + : 0.0; + + stats.emplace_back("totalTime", totalTimeSec); + stats.emplace_back("tokensPerSecond", tps); + stats.emplace_back("totalTokens", totalTokens_); + stats.emplace_back("totalSegments", totalSegments_); + stats.emplace_back("processCalls", processCalls_); + stats.emplace_back("totalWallMs", totalWallMs_); + stats.emplace_back("whisperSampleMs", whisperSampleMs_); + stats.emplace_back("whisperEncodeMs", whisperEncodeMs_); + stats.emplace_back("whisperDecodeMs", whisperDecodeMs_); + stats.emplace_back("whisperBatchdMs", whisperBatchdMs_); + stats.emplace_back("whisperPromptMs", whisperPromptMs_); + return stats; +} + +static void onNewSegment( + [[maybe_unused]] whisper_context* ctx, whisper_state* state, int nNew, + void* userData) { + auto* bci = static_cast(userData); + if (bci == nullptr || state == nullptr) return; + + const int nSegments = whisper_full_n_segments_from_state(state); + if (nNew <= 0 || nSegments <= 0) return; + const int startIndex = std::max(0, nSegments - nNew); + + for (int i = startIndex; i < nSegments; i++) { + Transcript transcript; + const char* text = whisper_full_get_segment_text_from_state(state, i); + transcript.text = text != nullptr ? text : ""; + transcript.start = + static_cast(whisper_full_get_segment_t0_from_state(state, i)) * + K_SEGMENT_TIMESTAMP_SCALE; + transcript.end = + static_cast(whisper_full_get_segment_t1_from_state(state, i)) * + K_SEGMENT_TIMESTAMP_SCALE; + transcript.id = i; + + bci->emitSegment(transcript); + bci->addTranscription(transcript); + + const int nTokens = whisper_full_n_tokens_from_state(state, i); + bci->recordSegmentStats(nTokens); + } +} + +void BCIModel::warmup() { + if (!ctx_) return; + + std::vector silentAudio(K_WARMUP_SAMPLE_COUNT, 0.0F); + whisper_full_params params = toWhisperFullParams(cfg_); + params.new_segment_callback = nullptr; + params.new_segment_callback_user_data = nullptr; + + whisper_full(ctx_.get(), params, + silentAudio.data(), + static_cast(silentAudio.size())); +} + +void BCIModel::process(const Input& rawNeuralData) { + if (ctx_ == nullptr) { + throw std::runtime_error("BCI Whisper context is not initialized โ€” call load() first"); + } + + if (cancelRequested_.load(std::memory_order_relaxed)) { + throw std::runtime_error("Job cancelled"); + } + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG, + "Processing neural signal (" + + std::to_string(rawNeuralData.size()) + " bytes)"); + + // Default day_idx = 0 matches NeuralProcessor::processToMel and the public + // JS/TS docs. The reference fixtures in test/fixtures/manifest.json pass + // day_idx=1 explicitly; callers that omit bciConfig get day 0. + int dayIdx = 0; + auto it = cfg_.bciConfig.find("day_idx"); + if (it != cfg_.bciConfig.end()) { + if (auto* d = std::get_if(&it->second)) { + dayIdx = static_cast(*d); + } else if (auto* i = std::get_if(&it->second)) { + dayIdx = *i; + } + } + + if (neuralProcessor_.hasWeights()) { + const int maxDay = + static_cast(neuralProcessor_.getNumDays()) - 1; + if (maxDay >= 0 && (dayIdx < 0 || dayIdx > maxDay)) { + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::WARNING, + "day_idx " + std::to_string(dayIdx) + + " is outside [0, " + std::to_string(maxDay) + + "]; it will be clamped"); + } + } + + auto melFeatures = neuralProcessor_.processToMel(rawNeuralData, dayIdx); + const int melBins = neuralProcessor_.getMelBins(); + const int melFrames = neuralProcessor_.getMelFrames(); + + processCalls_ += 1; + + whisper_reset_timings(ctx_.get()); + + const auto startTime = std::chrono::steady_clock::now(); + + EncoderCallbackData cbData; + cbData.ctx = ctx_.get(); + cbData.melData = melFeatures.data(); + cbData.melFrames = melFrames; + cbData.melBins = melBins; + + whisper_full_params params = toWhisperFullParams(cfg_); + params.new_segment_callback = onNewSegment; + params.new_segment_callback_user_data = this; + params.abort_callback = shouldAbortWhisper; + params.abort_callback_user_data = &cancelRequested_; + params.encoder_begin_callback = onEncoderBegin; + params.encoder_begin_callback_user_data = &cbData; + + if (dummyAudioPad_.size() != static_cast(K_DUMMY_AUDIO_30S)) { + dummyAudioPad_.assign(K_DUMMY_AUDIO_30S, 0.0F); + } + + int result = whisper_full( + ctx_.get(), params, + dummyAudioPad_.data(), static_cast(dummyAudioPad_.size())); + + const auto endTime = std::chrono::steady_clock::now(); + totalWallMs_ += + std::chrono::duration(endTime - startTime).count(); + + if (auto* whisperTimings = whisper_get_timings(ctx_.get()); + whisperTimings != nullptr) { + whisperSampleMs_ += whisperTimings->sample_ms; + whisperEncodeMs_ += whisperTimings->encode_ms; + whisperDecodeMs_ += whisperTimings->decode_ms; + whisperBatchdMs_ += whisperTimings->batchd_ms; + whisperPromptMs_ += whisperTimings->prompt_ms; + } + + if (result != 0) { + if (cancelRequested_.load(std::memory_order_relaxed)) { + throw std::runtime_error("Job cancelled"); + } + throw std::runtime_error( + "Failed to process neural signal (whisper_full returned " + + std::to_string(result) + ")"); + } +} + +std::any BCIModel::process(const std::any& input) { + AnyInput modelInput; + if (auto* anyInput = std::any_cast( + const_cast(&input))) { + modelInput = std::move(*anyInput); + } else if (auto* inputVector = std::any_cast( + const_cast(&input))) { + modelInput.input = std::move(*inputVector); + } else { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + std::string("Invalid input type for BCIModel::process: ") + + input.type().name()); + } + + const auto previousOutputCallback = on_segment_; + const bool shouldOverrideCallback = + static_cast(modelInput.outputCallback); + if (shouldOverrideCallback) { + on_segment_ = modelInput.outputCallback; + } + + // Clear the cancel flag FIRST so a cancel() call that races with reset() + // is not silently lost. process(Input&) still checks cancelRequested_ at + // the top, so a cancel that arrives between these two statements aborts + // the upcoming whisper_full call via shouldAbortWhisper. + cancelRequested_.store(false, std::memory_order_relaxed); + reset(); + try { + process(modelInput.input); + } catch (...) { + if (shouldOverrideCallback) { + on_segment_ = previousOutputCallback; + } + throw; + } + + if (shouldOverrideCallback) { + on_segment_ = previousOutputCallback; + } + + return output_; +} + +void BCIModel::saveLoadParams(const BCIConfig& config) { + setConfig(config); +} + +void BCIModel::cancel() const { + cancelRequested_.store(true, std::memory_order_relaxed); +} + +bool BCIModel::configContextIsChanged( + const BCIConfig& oldCfg, const BCIConfig& newCfg) { + const std::vector contextKeys = { + "model", "use_gpu", "flash_attn", "gpu_device"}; + return std::ranges::any_of(contextKeys, [&](const std::string& key) { + const auto oldIt = oldCfg.whisperContextCfg.find(key); + const auto newIt = newCfg.whisperContextCfg.find(key); + if (oldIt != oldCfg.whisperContextCfg.end() && + newIt != newCfg.whisperContextCfg.end()) { + return oldIt->second != newIt->second; + } + return (oldIt != oldCfg.whisperContextCfg.end()) != + (newIt != newCfg.whisperContextCfg.end()); + }); +} + +void BCIModel::resetContext() { ctx_.reset(); } + +void BCIModel::setConfig(const BCIConfig& config) { + bool contextChanged = configContextIsChanged(cfg_, config); + cfg_ = config; + if (contextChanged) reload(); +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp new file mode 100644 index 0000000000..7c02e176f5 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "BCIConfig.hpp" +#include "NeuralProcessor.hpp" +#include "model-interface/BCITypes.hpp" +#include "qvac-lib-inference-addon-cpp/ModelInterfaces.hpp" +#include "qvac-lib-inference-addon-cpp/RuntimeStats.hpp" + +namespace qvac_lib_inference_addon_bci { + +class BCIModel + : public qvac_lib_inference_addon_cpp::model::IModel, + public qvac_lib_inference_addon_cpp::model::IModelCancel, + public qvac_lib_inference_addon_cpp::model::IModelAsyncLoad { +public: + using OutputCallback = std::function; + using ValueType = float; + using Input = std::vector; + using Output = std::vector; + + struct AnyInput { + Input input; + OutputCallback outputCallback = nullptr; + }; + + // Data passed to encoder_begin_callback so it can inject mel features. + struct EncoderCallbackData { + whisper_context* ctx = nullptr; + const float* melData = nullptr; + int melFrames = 0; + int melBins = 0; + }; + + explicit BCIModel(BCIConfig config); + ~BCIModel() noexcept; + + void initializeBackend() {} + void setConfig(const BCIConfig& config); + + auto setOnSegmentCallback(const OutputCallback& callback) -> void { + on_segment_ = callback; + } + auto addTranscription(const Transcript& transcript) -> void { + output_.push_back(transcript); + } + auto hasSegmentCallback() const -> bool { + return static_cast(on_segment_); + } + auto emitSegment(const Transcript& transcript) -> void { + if (on_segment_) { + on_segment_(transcript); + } + } + + std::string getName() const override { return "BCIModel"; } + std::any process(const std::any& input) override; + void cancel() const override; + + void process(const Input& input); + + void load(); + void unload(); + void unloadWeights() { unload(); } + void reload(); + void reset(); + void waitForLoadInitialization() override { load(); } + void setWeightsForFile( + const std::string&, + std::unique_ptr>&&) override {} + bool isLoaded() const { return is_loaded_; } + qvac_lib_inference_addon_cpp::RuntimeStats runtimeStats() const override; + void warmup(); + + void saveLoadParams(const BCIConfig& config); + template + std::enable_if_t, BCIConfig>, void> + saveLoadParams(T&&, Args&&...) {} + + void recordSegmentStats(int nTokens) { + totalSegments_ += 1; + if (nTokens > 0) { + totalTokens_ += static_cast(nTokens); + } + } + +private: + static bool configContextIsChanged( + const BCIConfig& oldCfg, const BCIConfig& newCfg); + void resetContext(); + void loadEmbedderIfNeeded(); + + BCIConfig cfg_; + NeuralProcessor neuralProcessor_; + OutputCallback on_segment_; + Output output_; + + struct WhisperContextDeleter { + void operator()(whisper_context* ctx) const noexcept { + if (ctx != nullptr) { + whisper_free(ctx); + } + } + }; + + std::unique_ptr ctx_{nullptr}; + bool is_loaded_ = false; + bool is_warmed_up_ = false; + + int64_t totalTokens_ = 0; + int64_t totalSegments_ = 0; + int64_t processCalls_ = 0; + double totalWallMs_ = 0.0; + + // whisper.cpp internal stage timings aggregated across process() calls. + double whisperSampleMs_ = 0.0; + double whisperEncodeMs_ = 0.0; + double whisperDecodeMs_ = 0.0; + double whisperBatchdMs_ = 0.0; + double whisperPromptMs_ = 0.0; + + // 30 s of silent audio reused on every process() call; whisper.cpp does + // the actual encode via our encoder_begin_callback, but it still requires + // a padding buffer of the right shape. Hoisted to a member so we don't + // reallocate ~1.9 MB per call. + std::vector dummyAudioPad_; + + mutable std::atomic_bool cancelRequested_{false}; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp new file mode 100644 index 0000000000..a4448e464a --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp @@ -0,0 +1,321 @@ +#include "NeuralProcessor.hpp" + +#include +#include +#include +#include +#include + +#include "addon/BCIErrors.hpp" +#include "qvac-lib-inference-addon-cpp/Logger.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace { +constexpr size_t K_HEADER_BYTES = 8; +constexpr uint32_t K_EMBEDDER_MAGIC = 0x42434945; + +// Kernel-trim threshold used by gaussianSmooth: values below this are +// considered numerically negligible and trimmed from the ends of the kernel +// so the convolution loop touches fewer source timesteps. Matches the +// BrainWhisperer Python reference. +constexpr float K_KERNEL_TRIM_THRESHOLD = 0.01F; + +// Default Gaussian smoothing parameters matching the BrainWhisperer Python +// notebook. These are the ฯƒ and kernel width used for temporal smoothing of +// the raw neural signal before day-projection and mel padding. +constexpr float K_SMOOTH_KERNEL_STD = 2.0F; +constexpr int K_SMOOTH_KERNEL_SIZE = 100; + +bool hasExpectedSize(const std::vector& vec, size_t expected) { + return vec.size() == expected; +} +} // namespace + +NeuralProcessor::NeuralProcessor() = default; + +bool NeuralProcessor::loadEmbedderWeights(const std::string& path) { + std::ifstream f(path, std::ios::binary); + if (!f.is_open()) return false; + + // A truncated/corrupt embedder file would otherwise silently load as + // zeros and produce garbage output at inference time. Check f.good() + // after every read and bail out cleanly so the caller reports the file + // as missing / invalid instead of the model emitting nonsense. + bool readFailed = false; + + auto readU32 = [&]() -> uint32_t { + uint32_t v = 0; + f.read(reinterpret_cast(&v), sizeof(v)); + if (!f) readFailed = true; + return v; + }; + auto readFloats = [&](size_t count) -> std::vector { + std::vector data(count); + if (count > 0) { + f.read(reinterpret_cast(data.data()), + static_cast(count * sizeof(float))); + if (!f) readFailed = true; + } + return data; + }; + auto readInts = [&](size_t count) -> std::vector { + std::vector data(count); + if (count > 0) { + f.read(reinterpret_cast(data.data()), + static_cast(count * sizeof(int32_t))); + if (!f) readFailed = true; + } + return data; + }; + + if (readU32() != K_EMBEDDER_MAGIC || readU32() != 1 || readFailed) { + return false; + } + + weights_.numFeatures = readU32(); + /*embedDim=*/ readU32(); + /*kernelSize1=*/ readU32(); + /*kernelSize2=*/ readU32(); + /*stride2=*/ readU32(); + weights_.numDays = readU32(); + weights_.numMonths = readU32(); + weights_.r = readU32(); + if (readFailed) return false; + + // Skip conv1/conv2 weights (handled by GGML model) + uint32_t n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + if (readFailed) return false; + + n = readU32(); + weights_.sessionToDayMap = readInts(n); + if (readFailed) return false; + + weights_.dayAs.resize(weights_.numDays); + weights_.dayBs.resize(weights_.numDays); + weights_.dayBiases.resize(weights_.numDays); + for (uint32_t i = 0; i < weights_.numDays; ++i) { + n = readU32(); weights_.dayAs[i] = readFloats(n); + n = readU32(); weights_.dayBs[i] = readFloats(n); + n = readU32(); weights_.dayBiases[i] = readFloats(n); + if (readFailed) return false; + } + + weights_.monthWeights.resize(weights_.numMonths); + weights_.monthBiases.resize(weights_.numMonths); + for (uint32_t i = 0; i < weights_.numMonths; ++i) { + n = readU32(); weights_.monthWeights[i] = readFloats(n); + n = readU32(); weights_.monthBiases[i] = readFloats(n); + if (readFailed) return false; + } + + const size_t nf = static_cast(weights_.numFeatures); + const size_t r = static_cast(weights_.r); + const size_t expectedDayA = nf * r; + const size_t expectedDayB = r * nf; + const size_t expectedDayBias = nf; + const size_t expectedMonthW = nf * nf; + + for (uint32_t i = 0; i < weights_.numDays; ++i) { + if (!hasExpectedSize(weights_.dayAs[i], expectedDayA) || + !hasExpectedSize(weights_.dayBs[i], expectedDayB) || + !hasExpectedSize(weights_.dayBiases[i], expectedDayBias)) { + return false; + } + } + + for (uint32_t i = 0; i < weights_.numMonths; ++i) { + if (!hasExpectedSize(weights_.monthWeights[i], expectedMonthW) || + !hasExpectedSize(weights_.monthBiases[i], expectedDayBias)) { + return false; + } + } + + weights_.loaded = true; + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loaded day projection weights: " + + std::to_string(weights_.numDays) + " days, r=" + + std::to_string(weights_.r)); + return true; +} + +std::vector NeuralProcessor::gaussianSmooth( + const std::vector& data, + uint32_t numTimesteps, uint32_t numChannels, + float kernelStd, int kernelSize) { + + std::vector kernel(kernelSize); + const int center = kernelSize / 2; + float sum = 0.0F; + for (int i = 0; i < kernelSize; ++i) { + float x = static_cast(i - center); + kernel[i] = std::exp(-0.5F * (x * x) / (kernelStd * kernelStd)); + sum += kernel[i]; + } + for (auto& k : kernel) k /= sum; + + int start = 0, end = kernelSize - 1; + while (start < end && kernel[start] < K_KERNEL_TRIM_THRESHOLD) ++start; + while (end > start && kernel[end] < K_KERNEL_TRIM_THRESHOLD) --end; + std::vector trimK(kernel.begin() + start, kernel.begin() + end + 1); + const int halfK = static_cast(trimK.size()) / 2; + + std::vector result(data.size()); + for (uint32_t c = 0; c < numChannels; ++c) { + for (uint32_t t = 0; t < numTimesteps; ++t) { + float val = 0.0F; + for (int k = 0; k < static_cast(trimK.size()); ++k) { + int srcT = static_cast(t) + k - halfK; + if (srcT >= 0 && srcT < static_cast(numTimesteps)) + val += data[srcT * numChannels + c] * trimK[k]; + } + result[t * numChannels + c] = val; + } + } + return result; +} + +std::vector NeuralProcessor::applyDayProjection( + const std::vector& features, + uint32_t numTimesteps, uint32_t numChannels, int dayIdx) const { + + if (!weights_.loaded || weights_.r == 0) return features; + + const uint32_t nf = weights_.numFeatures; + const uint32_t r = weights_.r; + int di = std::clamp(dayIdx, 0, static_cast(weights_.numDays) - 1); + + // Rebuild the dense projection only when the resolved day index changes. + // Materializing dayDelta + W costs O(nf*nf*r) + O(nf*nf); for nf=512,r=8 + // that is ~2M + 0.25M multiplies per recompute. + if (di != cachedDayIdx_ || + cachedProjectionW_.size() != static_cast(nf) * nf || + cachedProjectionBias_.size() != nf) { + const auto& dayA = weights_.dayAs[di]; + const auto& dayB = weights_.dayBs[di]; + const auto& dayBias = weights_.dayBiases[di]; + + cachedProjectionW_.assign(static_cast(nf) * nf, 0.0F); + cachedProjectionBias_.assign(nf, 0.0F); + + for (uint32_t i = 0; i < nf; ++i) { + for (uint32_t j = 0; j < nf; ++j) { + float s = 0.0F; + for (uint32_t k = 0; k < r; ++k) { + s += dayA[i * r + k] * dayB[k * nf + j]; + } + cachedProjectionW_[i * nf + j] = s; + } + } + + int monthIdx = di / 30; + bool hasMonth = + (monthIdx < static_cast(weights_.monthWeights.size()) && + !weights_.monthWeights[monthIdx].empty()); + if (hasMonth) { + const auto& mw = weights_.monthWeights[monthIdx]; + for (uint32_t i = 0; i < nf * nf; ++i) { + cachedProjectionW_[i] += mw[i]; + } + } + + for (uint32_t i = 0; i < nf; ++i) { + cachedProjectionBias_[i] = dayBias[i]; + if (hasMonth && i < weights_.monthBiases[monthIdx].size()) { + cachedProjectionBias_[i] += weights_.monthBiases[monthIdx][i]; + } + } + + cachedDayIdx_ = di; + } + + const auto& W = cachedProjectionW_; + const auto& bias = cachedProjectionBias_; + + // Python: output[t,k] = softsign(sum_d(features[t,d] * W[d,k]) + bias[k]) + // i.e. output = features @ W + bias (right-multiply by W) + std::vector output(numTimesteps * nf); + for (uint32_t t = 0; t < numTimesteps; ++t) { + for (uint32_t k = 0; k < nf; ++k) { + float s = bias[k]; + for (uint32_t d = 0; d < nf; ++d) { + s += features[t * numChannels + d] * W[d * nf + k]; + } + output[t * nf + k] = s / (1.0F + std::abs(s)); + } + } + + return output; +} + +std::vector NeuralProcessor::processToMel( + const std::vector& rawData, int dayIdx) const { + + if (rawData.size() < K_HEADER_BYTES) { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::InvalidNeuralSignal, + "Neural signal buffer too small"); + } + + uint32_t numTimesteps = 0, numChannels = 0; + std::memcpy(&numTimesteps, rawData.data(), sizeof(uint32_t)); + std::memcpy(&numChannels, rawData.data() + sizeof(uint32_t), sizeof(uint32_t)); + + size_t expectedBytes = static_cast(numTimesteps) * numChannels * sizeof(float); + if (rawData.size() < K_HEADER_BYTES + expectedBytes) { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::InvalidNeuralSignal, + "Neural signal buffer truncated"); + } + + std::vector features(numTimesteps * numChannels); + std::memcpy(features.data(), rawData.data() + K_HEADER_BYTES, expectedBytes); + + // Passthrough mode: if dayIdx == -1, skip preprocessing and treat + // the input as pre-computed mel features in frame-major layout. + if (dayIdx == -1) { + const int melBins = K_WHISPER_N_MEL; + const int melFrames = K_WHISPER_MEL_FRAMES; + std::vector melOutput(melFrames * melBins, 0.0F); + uint32_t framesToCopy = std::min(numTimesteps, static_cast(melFrames)); + uint32_t chToCopy = std::min(numChannels, static_cast(melBins)); + for (uint32_t t = 0; t < framesToCopy; ++t) + for (uint32_t c = 0; c < chToCopy; ++c) + melOutput[c * melFrames + t] = features[t * numChannels + c]; + return melOutput; + } + + auto smoothed = gaussianSmooth( + features, numTimesteps, numChannels, + K_SMOOTH_KERNEL_STD, K_SMOOTH_KERNEL_SIZE); + + // Step 2: Day projection (if available) + std::vector projected; + uint32_t projChannels = numChannels; + if (weights_.loaded && weights_.r > 0) { + projected = applyDayProjection(smoothed, numTimesteps, numChannels, dayIdx); + projChannels = weights_.numFeatures; + } else { + projected = smoothed; + } + + // Step 3: Pad to 3000 frames at 512 channels for whisper_set_mel() + // whisper.cpp stores mel as mel.data[mel_bin * n_len + frame] (mel-major), + // so we must write in that layout for whisper_set_mel_with_state. + const int melBins = K_WHISPER_N_MEL; + const int melFrames = K_WHISPER_MEL_FRAMES; + std::vector melOutput(melFrames * melBins, 0.0F); + + uint32_t framesToCopy = std::min(numTimesteps, static_cast(melFrames)); + uint32_t chToCopy = std::min(projChannels, static_cast(melBins)); + for (uint32_t t = 0; t < framesToCopy; ++t) + for (uint32_t c = 0; c < chToCopy; ++c) + melOutput[c * melFrames + t] = projected[t * projChannels + c]; + + return melOutput; +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp new file mode 100644 index 0000000000..e43d5716fc --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include + +namespace qvac_lib_inference_addon_bci { + +// Preprocesses raw multi-channel neural signals for whisper.cpp. +// +// Pipeline: neural(512ch) โ†’ smooth โ†’ day_proj โ†’ pad to 3000 frames +// Output is 512-dim x 3000 frames, fed to whisper_set_mel(). +// whisper.cpp (patched) handles: conv1(512โ†’384,k=7) โ†’ GELU โ†’ conv2 โ†’ GELU +// โ†’ positional_embedding โ†’ 6-layer transformer โ†’ LoRA-merged decoder โ†’ text +class NeuralProcessor { +public: + static constexpr int K_WHISPER_N_MEL = 512; // n_mels in GGML model + static constexpr int K_WHISPER_MEL_FRAMES = 3000; + + struct EmbedderWeights { + bool loaded = false; + uint32_t numFeatures = 512; + uint32_t numDays = 0; + uint32_t numMonths = 0; + uint32_t r = 0; + + std::vector sessionToDayMap; + std::vector> dayAs; + std::vector> dayBs; + std::vector> dayBiases; + std::vector> monthWeights; + std::vector> monthBiases; + }; + + NeuralProcessor(); + + bool loadEmbedderWeights(const std::string& path); + + std::vector processToMel( + const std::vector& rawData, + int dayIdx = 0) const; + + static std::vector gaussianSmooth( + const std::vector& data, + uint32_t numTimesteps, uint32_t numChannels, + float kernelStd = 2.0F, int kernelSize = 100); + + std::vector applyDayProjection( + const std::vector& features, + uint32_t numTimesteps, uint32_t numChannels, + int dayIdx) const; + + bool hasWeights() const { return weights_.loaded; } + uint32_t getNumDays() const { return weights_.numDays; } + int getMelBins() const { return K_WHISPER_N_MEL; } + int getMelFrames() const { return K_WHISPER_MEL_FRAMES; } + +private: + EmbedderWeights weights_; + + // Memoized dense projection (W, bias) per resolved day index. The + // underlying low-rank dayA ยท dayB + month correction is O(nf*nf*r) to + // materialize; caching makes same-day batch inference much cheaper. + mutable int cachedDayIdx_ = -1; + mutable std::vector cachedProjectionW_; + mutable std::vector cachedProjectionBias_; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/tests/test_core.cpp b/packages/bci-whispercpp/addon/tests/test_core.cpp new file mode 100644 index 0000000000..40466e5a9e --- /dev/null +++ b/packages/bci-whispercpp/addon/tests/test_core.cpp @@ -0,0 +1,195 @@ +#include +#include +#include + +#include + +#include "model-interface/bci/NeuralProcessor.hpp" +#include "model-interface/bci/BCIConfig.hpp" + +using namespace qvac_lib_inference_addon_bci; + +namespace { + +std::vector createTestSignal(uint32_t numTimesteps, uint32_t numChannels) { + const size_t headerSize = 2 * sizeof(uint32_t); + const size_t dataSize = numTimesteps * numChannels * sizeof(float); + std::vector buffer(headerSize + dataSize); + + std::memcpy(buffer.data(), &numTimesteps, sizeof(uint32_t)); + std::memcpy(buffer.data() + sizeof(uint32_t), &numChannels, sizeof(uint32_t)); + + auto* data = reinterpret_cast(buffer.data() + headerSize); + for (uint32_t t = 0; t < numTimesteps; ++t) { + for (uint32_t c = 0; c < numChannels; ++c) { + data[t * numChannels + c] = + static_cast(t) / static_cast(numTimesteps) * + std::sin(static_cast(c) * 0.1F); + } + } + return buffer; +} + +} // namespace + +TEST(NeuralProcessor, ProcessToMelProducesCorrectShape) { + NeuralProcessor processor; + auto signal = createTestSignal(100, 512); + auto result = processor.processToMel(signal); + + EXPECT_EQ(result.size(), + static_cast(NeuralProcessor::K_WHISPER_MEL_FRAMES) * + NeuralProcessor::K_WHISPER_N_MEL); +} + +TEST(NeuralProcessor, ProcessToMelRejectsSmallBuffer) { + NeuralProcessor processor; + std::vector tooSmall = {1, 2, 3}; + EXPECT_THROW(processor.processToMel(tooSmall), std::exception); +} + +TEST(NeuralProcessor, GaussianSmoothPreservesSize) { + uint32_t T = 50, C = 8; + std::vector data(T * C, 1.0F); + auto smoothed = NeuralProcessor::gaussianSmooth(data, T, C, 2.0F, 20); + EXPECT_EQ(smoothed.size(), data.size()); +} + +TEST(NeuralProcessor, GaussianSmoothReducesNoise) { + uint32_t T = 100, C = 4; + std::vector data(T * C); + for (uint32_t t = 0; t < T; ++t) + for (uint32_t c = 0; c < C; ++c) + data[t * C + c] = (t % 2 == 0) ? 1.0F : -1.0F; + + auto smoothed = NeuralProcessor::gaussianSmooth(data, T, C, 2.0F, 20); + + float origVar = 0, smoothVar = 0; + for (size_t i = 0; i < data.size(); ++i) { + origVar += data[i] * data[i]; + smoothVar += smoothed[i] * smoothed[i]; + } + EXPECT_LT(smoothVar, origVar); +} + +TEST(NeuralProcessor, OutputValuesAreFinite) { + NeuralProcessor processor; + auto signal = createTestSignal(50, 512); + auto result = processor.processToMel(signal); + for (const auto& sample : result) { + EXPECT_TRUE(std::isfinite(sample)); + } +} + +TEST(NeuralProcessor, PaddedFramesAreZero) { + NeuralProcessor processor; + auto signal = createTestSignal(50, 512); + auto result = processor.processToMel(signal); + + float lastFrameSum = 0; + int lastFrame = NeuralProcessor::K_WHISPER_MEL_FRAMES - 1; + // mel output is mel-major: data[bin * n_frames + frame] + for (int m = 0; m < NeuralProcessor::K_WHISPER_N_MEL; ++m) { + lastFrameSum += std::abs(result[m * NeuralProcessor::K_WHISPER_MEL_FRAMES + lastFrame]); + } + EXPECT_FLOAT_EQ(lastFrameSum, 0.0F); +} + +TEST(BCIConfig, DefaultWhisperFullParamsAreValid) { + BCIConfig config; + config.whisperMainCfg["language"] = std::string("en"); + auto params = toWhisperFullParams(config); + EXPECT_STREQ(params.language, "en"); +} + +TEST(BCIConfig, UnknownWhisperKeyIsRejected) { + BCIConfig config; + config.whisperMainCfg["not_a_real_key"] = true; + EXPECT_THROW(toWhisperFullParams(config), std::exception); +} + +TEST(BCIConfig, UnknownContextKeyIsRejected) { + BCIConfig config; + config.whisperContextCfg["nope"] = std::string("value"); + EXPECT_THROW(toWhisperContextParams(config), std::exception); +} + +TEST(BCIConfig, NumericDoubleCoercedToInt) { + BCIConfig config; + config.whisperMainCfg["n_threads"] = 4.0; + config.whisperMainCfg["duration_ms"] = 100.0; + auto params = toWhisperFullParams(config); + EXPECT_EQ(params.n_threads, 4); + EXPECT_EQ(params.duration_ms, 100); +} + +TEST(BCIConfig, NegativeNThreadsRejected) { + BCIConfig config; + config.whisperMainCfg["n_threads"] = -1.0; + EXPECT_THROW(toWhisperFullParams(config), std::exception); +} + +TEST(BCIConfig, NegativeDurationMsRejected) { + BCIConfig config; + config.whisperMainCfg["duration_ms"] = -5.0; + EXPECT_THROW(toWhisperFullParams(config), std::exception); +} + +TEST(BCIConfig, TemperatureOutOfRangeRejected) { + BCIConfig config; + config.whisperMainCfg["temperature"] = 3.5; + EXPECT_THROW(toWhisperFullParams(config), std::exception); +} + +TEST(BCIConfig, BeamSizeOutOfRangeRejected) { + BCIConfig config; + config.whisperMainCfg["beam_search_beam_size"] = 0.0; + EXPECT_THROW(toWhisperFullParams(config), std::exception); + BCIConfig big; + big.whisperMainCfg["beam_search_beam_size"] = 100.0; + EXPECT_THROW(toWhisperFullParams(big), std::exception); +} + +TEST(BCIConfig, ContextGpuDeviceMustBeNonNegative) { + BCIConfig config; + config.whisperContextCfg["gpu_device"] = -1.0; + EXPECT_THROW(toWhisperContextParams(config), std::exception); +} + +TEST(BCIConfig, ContextBooleanHandlersWireThrough) { + BCIConfig config; + config.whisperContextCfg["use_gpu"] = true; + config.whisperContextCfg["flash_attn"] = false; + auto params = toWhisperContextParams(config); + EXPECT_TRUE(params.use_gpu); + EXPECT_FALSE(params.flash_attn); +} + +TEST(NeuralProcessor, LoadInvalidEmbedderReturnsFalse) { + NeuralProcessor processor; + EXPECT_FALSE(processor.loadEmbedderWeights("/nonexistent/path/embedder.bin")); + EXPECT_FALSE(processor.hasWeights()); +} + +TEST(NeuralProcessor, PassthroughModeSkipsPreprocessing) { + NeuralProcessor processor; + // Build a small "pre-computed mel" buffer and ensure passthrough + // reshapes it into mel-major layout without throwing or zero-padding + // the live frames. + const uint32_t T = 32; + const uint32_t C = 64; + auto signal = createTestSignal(T, C); + + auto result = processor.processToMel(signal, /*dayIdx=*/-1); + EXPECT_EQ(result.size(), + static_cast(NeuralProcessor::K_WHISPER_MEL_FRAMES) * + NeuralProcessor::K_WHISPER_N_MEL); + + // First frame, first bin should match the test signal's (t=0, c=0) value + // after the mel-major transpose: data[bin * n_frames + frame]. + const int nFrames = NeuralProcessor::K_WHISPER_MEL_FRAMES; + const float* originalData = reinterpret_cast( + signal.data() + 2 * sizeof(uint32_t)); + EXPECT_FLOAT_EQ(result[0 * nFrames + 0], originalData[0 * C + 0]); + EXPECT_FLOAT_EQ(result[1 * nFrames + 0], originalData[0 * C + 1]); +} diff --git a/packages/bci-whispercpp/addonLogging.d.ts b/packages/bci-whispercpp/addonLogging.d.ts new file mode 100644 index 0000000000..bd687d60bc --- /dev/null +++ b/packages/bci-whispercpp/addonLogging.d.ts @@ -0,0 +1,7 @@ +export interface AddonLogging { + setLogger(callback: (priority: number, message: string) => void): void + releaseLogger(): void +} + +declare const addonLogging: AddonLogging +export default addonLogging diff --git a/packages/bci-whispercpp/addonLogging.js b/packages/bci-whispercpp/addonLogging.js new file mode 100644 index 0000000000..479ecdf3da --- /dev/null +++ b/packages/bci-whispercpp/addonLogging.js @@ -0,0 +1,6 @@ +const binding = require('./binding') + +module.exports = { + setLogger: binding.setLogger, + releaseLogger: binding.releaseLogger +} diff --git a/packages/bci-whispercpp/bci.js b/packages/bci-whispercpp/bci.js new file mode 100644 index 0000000000..bbcdb46cf7 --- /dev/null +++ b/packages/bci-whispercpp/bci.js @@ -0,0 +1,370 @@ +'use strict' + +const { QvacErrorAddonBCI, ERR_CODES } = require('./lib/error') +const { checkConfig } = require('./configChecker') + +const state = Object.freeze({ + LOADING: 'loading', + LISTENING: 'listening', + PROCESSING: 'processing', + IDLE: 'idle' +}) + +const END_OF_INPUT = 'end of job' + +// Upper bound on buffered neural-signal bytes between append() calls. +// Neural data is ~1 MB/s at 512ch * 50 Hz * 4 B, so 500 MB ~= 8 minutes of +// signal. The bound matches qvac-lib-infer-whispercpp and protects against +// runaway producers. +const MAX_BUFFERED_BYTES = 500 * 1024 * 1024 + +function nextSafeId (current) { + return current >= Number.MAX_SAFE_INTEGER ? 1 : current + 1 +} + +/** + * Low-level interface between the Bare C++ BCI addon and the JS runtime. + * Accepts neural signal data (Uint8Array) instead of audio. + */ +class BCIInterface { + /** + * @param {Object} binding - the native binding object + * @param {Object} configurationParams - configuration for the BCI model + * @param {Function} outputCb - callback for inference events (Output, JobEnded, Error) + * @param {Function} [transitionCb] - callback for state changes + */ + constructor (binding, configurationParams, outputCb, transitionCb = null) { + this._binding = binding + this._outputCb = outputCb + this._transitionCb = transitionCb + this._nextJobId = 1 + this._activeJobId = null + this._bufferedSignal = [] + this._bufferedBytes = 0 + this._state = state.LOADING + + checkConfig(configurationParams) + this._handle = this._binding.createInstance( + this, + configurationParams, + this._addonOutputCallback.bind(this), + transitionCb + ) + } + + _setState (newState) { + this._state = newState + if (this._transitionCb) { + this._transitionCb(this, newState) + } + } + + _addonOutputCallback (addon, event, data, error) { + const isError = typeof error === 'string' && error.length > 0 + const isStats = data && typeof data === 'object' && ( + 'totalTime' in data || + 'tokensPerSecond' in data || + 'totalWallMs' in data + ) + const isTranscriptOutput = ( + (Array.isArray(data) && data.length > 0) || + (data && typeof data === 'object' && typeof data.text === 'string') + ) + + let mappedEvent = event + if (event === 'Error' || isError || String(event).includes('Error')) { + mappedEvent = 'Error' + } else if (event === 'JobEnded' || isStats || String(event).includes('RuntimeStats')) { + mappedEvent = 'JobEnded' + } else if (event === 'Output' || isTranscriptOutput) { + mappedEvent = 'Output' + } else if (Array.isArray(data) && data.length === 0) { + return + } + + const jobId = this._activeJobId + if (jobId === null || jobId === undefined) { + return + } + + if (mappedEvent === 'Output') { + this._setState(state.PROCESSING) + + if (this._outputCb != null) { + const isTranscriptArray = Array.isArray(data) && data.length > 0 && + typeof data[0]?.text === 'string' + const isSingleTranscript = !Array.isArray(data) && + data && typeof data === 'object' && typeof data.text === 'string' + if (isTranscriptArray) { + for (const segment of data) { + this._outputCb(addon, 'Output', jobId, [segment], null) + } + } else if (isSingleTranscript) { + this._outputCb(addon, 'Output', jobId, [data], null) + } else { + this._outputCb(addon, 'Output', jobId, data, null) + } + } + return + } + + if (this._outputCb != null) { + this._outputCb(addon, mappedEvent, jobId, data, isError ? error : null) + } + + if (mappedEvent === 'Error' || mappedEvent === 'JobEnded') { + this._activeJobId = null + this._setState(state.LISTENING) + } + } + + async unload () { + await this.destroyInstance() + } + + async load (configurationParams) { + checkConfig(configurationParams) + await this.destroyInstance() + this._handle = this._binding.createInstance( + this, + configurationParams, + this._addonOutputCallback.bind(this), + this._transitionCb + ) + this._setState(state.LOADING) + } + + async reload (configurationParams) { + checkConfig(configurationParams) + await this.cancel() + + if (typeof this._binding.reload === 'function') { + await this._binding.reload(this._handle, configurationParams) + this._setState(state.LOADING) + return + } + + await this.load(configurationParams) + } + + async loadWeights (weightsData) { + try { + this._binding.loadWeights(this._handle, weightsData) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_LOAD_WEIGHTS, + adds: err.message, + cause: err + }) + } + } + + async unloadWeights () { + return true + } + + async activate () { + try { + this._binding.activate(this._handle) + this._setState(state.LISTENING) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_ACTIVATE, + adds: err.message, + cause: err + }) + } + } + + async cancel (jobId) { + try { + await this._binding.cancel(this._handle, jobId) + this._bufferedSignal = [] + this._bufferedBytes = 0 + this._activeJobId = null + this._setState(state.LISTENING) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_CANCEL, + adds: err.message, + cause: err + }) + } + } + + /** + * Appends neural signal data to the processing buffer. + * Send { type: 'end of job' } to trigger processing. + * @param {Object} data + * @param {string} data.type - 'neural' or 'end of job' + * @param {Uint8Array} [data.input] - binary neural signal data + * @returns {number} job ID + */ + async append (data) { + try { + if (data?.type === END_OF_INPUT) { + if (this._bufferedSignal.length === 0) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.INVALID_NEURAL_INPUT, + adds: 'no neural signal data was appended before end-of-job' + }) + } + const currentJobId = this._nextJobId + const input = this._concatBufferedSignal() + const previousState = this._state + const previousJobId = this._activeJobId + + let accepted = false + try { + accepted = this._binding.runJob(this._handle, { + type: 'neural', + input + }) + } catch (err) { + this._activeJobId = previousJobId + this._setState(previousState) + throw err + } + if (!accepted) { + this._activeJobId = previousJobId + this._setState(previousState) + throw new QvacErrorAddonBCI({ code: ERR_CODES.JOB_ALREADY_RUNNING }) + } + + this._activeJobId = currentJobId + this._nextJobId = nextSafeId(this._nextJobId) + this._bufferedSignal = [] + this._bufferedBytes = 0 + this._setState(state.PROCESSING) + return currentJobId + } + + if (data?.type === 'neural') { + if (!(data.input instanceof Uint8Array)) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.INVALID_NEURAL_INPUT, + adds: 'input must be Uint8Array' + }) + } + if (this._bufferedBytes + data.input.byteLength > MAX_BUFFERED_BYTES) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.BUFFER_LIMIT_EXCEEDED, + adds: MAX_BUFFERED_BYTES + ' bytes' + }) + } + this._bufferedSignal.push(data.input) + this._bufferedBytes += data.input.byteLength + return this._nextJobId + } + + throw new Error(`Unknown append input type: ${data?.type}`) + } catch (err) { + if (err instanceof QvacErrorAddonBCI) throw err + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_APPEND, + adds: err.message, + cause: err + }) + } + } + + /** + * Run a single batch job directly with neural signal data. + * @param {Object} data + * @param {Uint8Array} data.input - binary neural signal data + */ + async runJob (data) { + if (!data || !(data.input instanceof Uint8Array)) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.INVALID_NEURAL_INPUT, + adds: 'runJob input must be a Uint8Array' + }) + } + if (data.input.byteLength === 0) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.INVALID_NEURAL_INPUT, + adds: 'runJob input must not be empty' + }) + } + + const candidateJobId = this._nextJobId + const previousState = this._state + const previousJobId = this._activeJobId + let accepted = false + try { + accepted = this._binding.runJob(this._handle, { + type: 'neural', + input: data.input + }) + } catch (err) { + this._activeJobId = previousJobId + this._setState(previousState) + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_START_JOB, + adds: err.message, + cause: err + }) + } + + if (!accepted) { + this._activeJobId = previousJobId + this._setState(previousState) + return false + } + + this._activeJobId = candidateJobId + this._nextJobId = nextSafeId(this._nextJobId) + this._setState(state.PROCESSING) + return accepted + } + + async status () { + return this._state + } + + async destroyInstance () { + if (this._handle === null) { + return + } + try { + try { + await this._binding.cancel(this._handle) + } catch {} + this._binding.destroyInstance(this._handle) + this._handle = null + this._bufferedSignal = [] + this._bufferedBytes = 0 + this._activeJobId = null + this._setState(state.IDLE) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_DESTROY, + adds: err.message, + cause: err + }) + } + } + + _concatBufferedSignal () { + if (this._bufferedSignal.length === 0) { + return new Uint8Array() + } + if (this._bufferedSignal.length === 1) { + return this._bufferedSignal[0] + } + const totalLength = this._bufferedSignal.reduce( + (sum, chunk) => sum + chunk.byteLength, 0 + ) + const merged = new Uint8Array(totalLength) + let offset = 0 + for (const chunk of this._bufferedSignal) { + merged.set(chunk, offset) + offset += chunk.byteLength + } + return merged + } +} + +BCIInterface.END_OF_INPUT = END_OF_INPUT + +module.exports = { BCIInterface, END_OF_INPUT, MAX_BUFFERED_BYTES, nextSafeId } diff --git a/packages/bci-whispercpp/binding.js b/packages/bci-whispercpp/binding.js new file mode 100644 index 0000000000..cea46308c0 --- /dev/null +++ b/packages/bci-whispercpp/binding.js @@ -0,0 +1 @@ +module.exports = require.addon() diff --git a/packages/bci-whispercpp/configChecker.js b/packages/bci-whispercpp/configChecker.js new file mode 100644 index 0000000000..4c684bd706 --- /dev/null +++ b/packages/bci-whispercpp/configChecker.js @@ -0,0 +1,91 @@ +'use strict' + +/** + * Validates BCI addon configuration. + * @param {Object} configObject + * @returns {void} or throws if invalid + */ +function checkConfig (configObject) { + const requiredSections = ['whisperConfig', 'contextParams', 'miscConfig'] + + for (const section of requiredSections) { + if (!configObject[section]) { + throw new Error(`${section} object is required`) + } + } + + const validWhisperParams = [ + 'n_threads', + 'duration_ms', + 'translate', + 'no_timestamps', + 'single_segment', + 'print_special', + 'print_progress', + 'print_realtime', + 'print_timestamps', + 'language', + 'detect_language', + 'suppress_blank', + 'suppress_nst', + 'temperature', + 'greedy_best_of', + 'beam_search_beam_size' + ] + + const validContextParams = [ + 'model', + 'use_gpu', + 'flash_attn', + 'gpu_device' + ] + + const validMiscParams = [ + 'caption_enabled' + ] + + const validBCIParams = [ + 'day_idx' + ] + + for (const userParam of Object.keys(configObject.whisperConfig)) { + if (!validWhisperParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for whisperConfig`) + } + } + + for (const userParam of Object.keys(configObject.contextParams)) { + if (!validContextParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for contextParams`) + } + } + + for (const userParam of Object.keys(configObject.miscConfig)) { + if (!validMiscParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for miscConfig`) + } + } + if (configObject.miscConfig.caption_enabled !== undefined && + typeof configObject.miscConfig.caption_enabled !== 'boolean') { + throw new Error('miscConfig.caption_enabled must be a boolean') + } + + if (configObject.bciConfig) { + for (const userParam of Object.keys(configObject.bciConfig)) { + if (!validBCIParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for bciConfig`) + } + } + const dayIdx = configObject.bciConfig.day_idx + if (dayIdx !== undefined) { + if (typeof dayIdx !== 'number' || !Number.isFinite(dayIdx) || !Number.isInteger(dayIdx)) { + throw new Error('bciConfig.day_idx must be a finite integer') + } + if (dayIdx < -1) { + throw new Error('bciConfig.day_idx must be >= -1 (use -1 to enable mel-passthrough mode)') + } + } + } +} + +module.exports = { checkConfig } diff --git a/packages/bci-whispercpp/examples/transcribe-neural.js b/packages/bci-whispercpp/examples/transcribe-neural.js new file mode 100644 index 0000000000..69357f5515 --- /dev/null +++ b/packages/bci-whispercpp/examples/transcribe-neural.js @@ -0,0 +1,149 @@ +'use strict' + +/** + * Transcribe neural signal files using the BCI BrainWhisperer model. + * Uses the native whisper.cpp GGML backend. + * + * Usage: + * bare examples/transcribe-neural.js [model_path] + * + * Or batch mode (all test fixtures): + * bare examples/transcribe-neural.js --batch [model_path] + */ + +const fs = require('bare-fs') +const path = require('bare-path') +const os = require('bare-os') +const BCIWhispercpp = require('../index') +const { flattenSegments } = require('../lib/util') + +const DEFAULT_MODEL = (os.hasEnv('WHISPER_MODEL_PATH') ? os.getEnv('WHISPER_MODEL_PATH') : null) || + path.join(__dirname, '..', 'models', 'ggml-bci-windowed.bin') + +async function main () { + const args = global.Bare ? global.Bare.argv.slice(2) : process.argv.slice(2) + const isBatch = args[0] === '--batch' + + if (args.length < 1) { + console.log('Usage:') + console.log(' Single: bare examples/transcribe-neural.js [model_path]') + console.log(' Batch: bare examples/transcribe-neural.js --batch [model_path]') + return + } + + // Single-signal mode: args[0]=signal, args[1]=optional model + // Batch mode: args[0]='--batch', args[1]=optional model + const modelPath = args[1] || DEFAULT_MODEL + if (!fs.existsSync(modelPath)) { + console.error('Error: Model file not found: ' + modelPath) + console.error('Set WHISPER_MODEL_PATH or pass as second argument.') + return + } + + if (isBatch) { + const manifestPath = path.join(__dirname, '..', 'test', 'fixtures', 'manifest.json') + const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')) + + console.log('=== BCI Neural Signal Transcription (Batch: ' + manifest.samples.length + ' samples) ===\n') + + const startTime = Date.now() + + const byDay = new Map() + for (const sample of manifest.samples) { + const key = typeof sample.day_idx === 'number' ? sample.day_idx : -1 + if (!byDay.has(key)) byDay.set(key, []) + byDay.get(key).push(sample) + } + + let total = 0 + let sumWER = 0 + + for (const [day, samples] of byDay) { + const bci = new BCIWhispercpp({ + files: { model: modelPath } + }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false }, + bciConfig: day >= 0 ? { day_idx: day } : undefined + }) + await bci.load() + + try { + for (const sample of samples) { + const samplePath = path.join(__dirname, '..', 'test', 'fixtures', sample.file) + if (!fs.existsSync(samplePath)) { + console.log(' [SKIP] ' + sample.file + ' (not found)') + continue + } + + const response = await bci.transcribeFile(samplePath) + const output = await response.await() + const segments = flattenSegments(output) + const text = segments.map(s => s.text).join('').trim() + const wer = BCIWhispercpp.computeWER(text, sample.expected_text) + + console.log(' [' + sample.file + '] day=' + day) + console.log(' Got: "' + text + '"') + console.log(' Expected: "' + sample.expected_text + '"') + console.log(' WER: ' + (wer * 100).toFixed(1) + '%\n') + + total += 1 + sumWER += wer + } + } finally { + await bci.destroy() + } + } + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + const avgWER = total > 0 ? sumWER / total : 0 + console.log('Average WER: ' + (avgWER * 100).toFixed(1) + '% (n=' + total + ')') + console.log('Time: ' + elapsed + 's') + } else { + const signalPath = args[0] + if (!fs.existsSync(signalPath)) { + console.error('Error: Signal file not found: ' + signalPath) + return + } + + const bci = new BCIWhispercpp({ + files: { model: modelPath } + }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + console.log('Model loaded.\n') + + const buf = fs.readFileSync(signalPath) + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength) + const T = view.getUint32(0, true) + const C = view.getUint32(4, true) + + console.log('=== BCI Neural Signal Transcription ===') + console.log('Signal: ' + signalPath) + console.log('Timesteps: ' + T + ', Channels: ' + C) + console.log('Duration: ~' + (T * 20 / 1000).toFixed(1) + 's\n') + + try { + const startTime = Date.now() + const response = await bci.transcribeFile(signalPath) + const output = await response.await() + const segments = flattenSegments(output) + const text = segments.map(s => s.text).join('').trim() + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + + console.log('Text: "' + text + '"') + console.log('Time: ' + elapsed + 's') + } finally { + await bci.destroy() + } + } + + console.log('\nDone.') +} + +main().catch((err) => { + console.error('Error:', err.message || err) +}) diff --git a/packages/bci-whispercpp/index.d.ts b/packages/bci-whispercpp/index.d.ts new file mode 100644 index 0000000000..9ca0f42542 --- /dev/null +++ b/packages/bci-whispercpp/index.d.ts @@ -0,0 +1,134 @@ +import QvacResponse from '@qvac/infer-base/src/QvacResponse' +import type { LoggerInterface } from '@qvac/logging' + +declare interface BCIConfig { + /** + * Session day index used to select day-specific projection matrices in + * bci-embedder.bin. + * + * - `day_idx >= 0` (default `0`): apply the day projection; values beyond + * the available range are clamped at the native layer. + * - `day_idx === -1`: mel passthrough โ€” skip preprocessing and treat + * the input buffer as pre-computed 512-bin mel features in + * frame-major layout. Intended for parity testing against the Python + * reference, not production use. + */ + day_idx?: number +} + +declare interface WhisperConfig { + language?: string + n_threads?: number + temperature?: number + suppress_nst?: boolean + suppress_blank?: boolean + duration_ms?: number + translate?: boolean + no_timestamps?: boolean + single_segment?: boolean + print_special?: boolean + print_progress?: boolean + print_realtime?: boolean + print_timestamps?: boolean + detect_language?: boolean + greedy_best_of?: number + beam_search_beam_size?: number +} + +declare interface BCIWhispercppFiles { + /** Absolute path to the BCI GGML model file. */ + model: string +} + +declare interface BCIWhispercppArgs { + files: BCIWhispercppFiles + logger?: LoggerInterface + opts?: { + stats?: boolean + } +} + +declare interface BCIWhispercppConfig { + whisperConfig?: WhisperConfig + bciConfig?: BCIConfig + contextParams?: { + model?: string + use_gpu?: boolean + flash_attn?: boolean + gpu_device?: number + } + miscConfig?: { + caption_enabled?: boolean + } +} + +declare interface TranscriptSegment { + text: string + toAppend: boolean + start: number + end: number + id: number +} + +declare interface BCIWhispercppState { + configLoaded: boolean + destroyed: boolean +} + +/** + * BCI neural signal transcription client powered by whisper.cpp. + * + * Uses `createJobHandler` + `exclusiveRunQueue` from `@qvac/infer-base` and + * follows the same lifecycle contract as `TranscriptionWhispercpp` / + * `LlmLlamacpp`: construct with local file paths, call `load()`, issue + * `transcribe()` / `transcribeFile()` calls, then `destroy()`. + */ +declare class BCIWhispercpp { + constructor(args: BCIWhispercppArgs, config?: BCIWhispercppConfig) + + /** Load and activate the model. Must be awaited before `transcribe()`. */ + load(): Promise + + /** Transcribe a neural signal binary file (convenience wrapper). */ + transcribeFile(filePath: string): Promise + + /** Transcribe a neural signal buffer (batch mode). */ + transcribe(neuralData: Uint8Array): Promise + + /** Cancel the in-flight inference, if any. */ + cancel(): Promise + + /** Unload the model and release native resources. Instance is reusable. */ + unload(): Promise + + /** + * Destroy the instance, unload, and mark as permanently destroyed. + * Subsequent `load()` calls will throw `MODEL_NOT_LOADED`. + */ + destroy(): Promise + + /** Current lifecycle state. */ + getState(): BCIWhispercppState +} + +declare namespace BCIWhispercpp { + /** + * Compute Word Error Rate between hypothesis and reference strings. + * @returns WER as a ratio (0.0 = perfect). + */ + function computeWER(hypothesis: string, reference: string): number + + export { + BCIWhispercpp as default, + BCIWhispercpp, + BCIConfig, + WhisperConfig, + BCIWhispercppFiles, + BCIWhispercppArgs, + BCIWhispercppConfig, + BCIWhispercppState, + TranscriptSegment + } +} + +export = BCIWhispercpp diff --git a/packages/bci-whispercpp/index.js b/packages/bci-whispercpp/index.js new file mode 100644 index 0000000000..9cfbcb7c54 --- /dev/null +++ b/packages/bci-whispercpp/index.js @@ -0,0 +1,286 @@ +'use strict' + +const fs = require('bare-fs') +const QvacLogger = require('@qvac/logging') +const { createJobHandler, exclusiveRunQueue } = require('@qvac/infer-base') + +const { BCIInterface } = require('./bci') +const { QvacErrorAddonBCI, ERR_CODES } = require('./lib/error') +const { computeWER } = require('./lib/wer') + +/** + * BCI neural signal transcription client powered by whisper.cpp. + * + * Follows the same architecture as TranscriptionWhispercpp / LlmLlamacpp: + * standalone class using createJobHandler + exclusiveRunQueue from + * @qvac/infer-base. + */ +class BCIWhispercpp { + /** + * @param {Object} args + * @param {Object} args.files - local model file paths + * @param {string} args.files.model - path to the BCI GGML model file + * @param {Object} [args.logger] - optional logger instance + * @param {Object} [args.opts] - optional options (e.g. { stats: true }) + * @param {Object} config - inference configuration + * @param {Object} config.whisperConfig - whisper decoding params + * @param {Object} [config.bciConfig] - BCI-specific params (e.g. { day_idx: 1 }) + * @param {Object} [config.contextParams] - whisper context params + * @param {Object} [config.miscConfig] - miscellaneous config + */ + constructor ({ files, logger = null, opts = {} }, config = {}) { + if (!files || typeof files.model !== 'string' || files.model.length === 0) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.MODEL_FILE_NOT_FOUND, + adds: 'files.model is required' + }) + } + + this._files = { model: files.model } + this._config = config + this.opts = opts + this.logger = new QvacLogger(logger) + this._withExclusiveRun = exclusiveRunQueue() + this._inferenceQueueWaiter = Promise.resolve() + this._job = createJobHandler({ + cancel: () => this.addon?.cancel() + }) + + this.addon = null + this.state = { + configLoaded: false, + destroyed: false + } + } + + getState () { + return this.state + } + + async load () { + if (this.state.destroyed) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.MODEL_NOT_LOADED, + adds: 'instance was destroyed' + }) + } + if (this.state.configLoaded) { + this.logger.info('Reload requested - unloading existing model first') + await this.unload() + } + await this._load() + this.state.configLoaded = true + } + + async _load () { + if (!fs.existsSync(this._files.model)) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.MODEL_FILE_NOT_FOUND, + adds: this._files.model + }) + } + + const whisperConfig = { + language: 'en', + n_threads: 0, + ...(this._config.whisperConfig || {}) + } + + const configurationParams = { + contextParams: { + model: this._files.model, + ...(this._config.contextParams || {}) + }, + whisperConfig, + miscConfig: { + caption_enabled: false, + ...(this._config.miscConfig || {}) + } + } + + if (this._config.bciConfig) { + configurationParams.bciConfig = this._config.bciConfig + } + + if (this.state.destroyed) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.MODEL_NOT_LOADED, + adds: 'instance was destroyed' + }) + } + + const binding = require('./binding') + try { + this.addon = new BCIInterface( + binding, + configurationParams, + this._outputCallback.bind(this), + this.logger.info.bind(this.logger) + ) + } catch (err) { + this.addon = null + const configError = this._isConfigurationError(err) + throw new QvacErrorAddonBCI({ + code: configError ? ERR_CODES.INVALID_CONFIG : ERR_CODES.FAILED_TO_LOAD_WEIGHTS, + adds: err.message, + cause: err + }) + } + + try { + await this.addon.activate() + } catch (err) { + this.addon = null + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_ACTIVATE, + adds: err.message, + cause: err + }) + } + this.logger.info('BCI addon activated') + } + + /** + * Transcribe a neural signal from a binary file. + * Convenience wrapper around transcribe(). + * @param {string} filePath - path to .bin neural signal file + * @returns {Promise} + */ + async transcribeFile (filePath) { + const data = fs.readFileSync(filePath) + return this.transcribe(new Uint8Array(data)) + } + + /** + * Transcribe neural signal data (batch mode). + * Returns a QvacResponse; use response.await() for the final output array, + * response.onUpdate() for streaming updates, response.stats for runtime stats. + * @param {Uint8Array} neuralData - binary neural signal + * @returns {Promise} + */ + async transcribe (neuralData) { + this._assertReadyForInference() + return await this._enqueueInference(async () => { + const response = this._job.start() + + let accepted + try { + accepted = await this.addon.runJob({ input: neuralData }) + } catch (err) { + this._job.fail(err) + throw err + } + if (!accepted) { + const error = new QvacErrorAddonBCI({ code: ERR_CODES.JOB_ALREADY_RUNNING }) + this._job.fail(error) + throw error + } + + const finalized = response.await() + finalized.catch(() => {}) + response.await = () => finalized + return response + }) + } + + /** + * Serialize inference runs so a second transcribe() waits until the first + * response settles. Separate from _withExclusiveRun (lifecycle ops) so + * destroy/unload can still preempt. + */ + async _enqueueInference (runFn) { + const prev = this._inferenceQueueWaiter + let releaseSlot + this._inferenceQueueWaiter = new Promise(resolve => { releaseSlot = resolve }) + await prev + let response + try { + response = await runFn() + } catch (err) { + releaseSlot() + throw err + } + response.await().finally(() => { releaseSlot() }).catch(() => {}) + return response + } + + _assertReadyForInference () { + if (this.state.destroyed || !this.state.configLoaded || !this.addon) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.MODEL_NOT_LOADED, + adds: this.state.destroyed ? 'instance was destroyed' : 'call load() before transcribe()' + }) + } + } + + _isConfigurationError (err) { + if (err && err.code === 'ERR_ASSERTION') return true + if (err instanceof TypeError) return true + const msg = String(err?.message || '') + return msg.includes('is required') || msg.includes('is not a valid parameter') || msg.includes('must be') + } + + _outputCallback (addon, event, jobId, data, error) { + if (event === 'Error') { + this.logger.error('Job ' + jobId + ' failed with error: ' + error) + this._job.fail(error) + return + } + if (event === 'Output') { + this._job.output(data) + return + } + if (event === 'JobEnded') { + this.logger.info('Job ' + jobId + ' completed') + if (this.opts.stats) { + this._job.end(data) + } else { + this._job.end() + } + return + } + this.logger.debug('Received event for job ' + jobId + ': ' + event) + } + + async cancel () { + if (this.addon?.cancel) { + await this.addon.cancel() + } + if (this._job.active) { + this._job.fail(new Error('Job cancelled')) + } + } + + async unload () { + return await this._withExclusiveRun(async () => { + await this._inferenceQueueWaiter + if (this.addon) { + await this.addon.destroyInstance() + this.addon = null + } + if (this._job.active) { + this._job.fail(new Error('Model was unloaded')) + } + this.state.configLoaded = false + }) + } + + async destroy () { + return await this._withExclusiveRun(async () => { + await this._inferenceQueueWaiter + if (this.addon) { + await this.addon.destroyInstance() + this.addon = null + } + if (this._job.active) { + this._job.fail(new Error('Model was destroyed')) + } + this.state.configLoaded = false + this.state.destroyed = true + }) + } +} + +module.exports = BCIWhispercpp +module.exports.BCIWhispercpp = BCIWhispercpp +module.exports.computeWER = computeWER diff --git a/packages/bci-whispercpp/lib/error.js b/packages/bci-whispercpp/lib/error.js new file mode 100644 index 0000000000..461149eb1f --- /dev/null +++ b/packages/bci-whispercpp/lib/error.js @@ -0,0 +1,92 @@ +'use strict' + +const { QvacErrorBase, addCodes } = require('@qvac/error') + +class QvacErrorAddonBCI extends QvacErrorBase { } + +const { name, version } = require('../package.json') + +// This library has error code range from 26001 to 27000. +// Ranges used elsewhere in the @qvac/error registry: +// 6001-6018 @qvac/transcription-whispercpp +// 7001-7011 @qvac/tts-onnx +// 8001-8008 @qvac/translation-nmtcpp +// 24001+ @qvac/transcription-parakeet +const ERR_CODES = Object.freeze({ + FAILED_TO_LOAD_WEIGHTS: 26001, + FAILED_TO_CANCEL: 26002, + FAILED_TO_APPEND: 26003, + FAILED_TO_DESTROY: 26004, + FAILED_TO_ACTIVATE: 26005, + INVALID_NEURAL_INPUT: 26006, + JOB_ALREADY_RUNNING: 26007, + MODEL_NOT_LOADED: 26008, + MODEL_FILE_NOT_FOUND: 26009, + BUFFER_LIMIT_EXCEEDED: 26010, + FAILED_TO_START_JOB: 26011, + INVALID_CONFIG: 26012, + EMBEDDER_WEIGHTS_INVALID: 26013 +}) + +addCodes({ + [ERR_CODES.FAILED_TO_LOAD_WEIGHTS]: { + name: 'FAILED_TO_LOAD_WEIGHTS', + message: (message) => `Failed to load weights, error: ${message}` + }, + [ERR_CODES.FAILED_TO_CANCEL]: { + name: 'FAILED_TO_CANCEL', + message: (message) => `Failed to cancel inference, error: ${message}` + }, + [ERR_CODES.FAILED_TO_APPEND]: { + name: 'FAILED_TO_APPEND', + message: (message) => `Failed to append data to processing queue, error: ${message}` + }, + [ERR_CODES.FAILED_TO_DESTROY]: { + name: 'FAILED_TO_DESTROY', + message: (message) => `Failed to destroy instance, error: ${message}` + }, + [ERR_CODES.FAILED_TO_ACTIVATE]: { + name: 'FAILED_TO_ACTIVATE', + message: (message) => `Failed to activate model, error: ${message}` + }, + [ERR_CODES.INVALID_NEURAL_INPUT]: { + name: 'INVALID_NEURAL_INPUT', + message: (message) => `Invalid neural signal input: ${message}` + }, + [ERR_CODES.JOB_ALREADY_RUNNING]: { + name: 'JOB_ALREADY_RUNNING', + message: () => 'Cannot set new job: a job is already set or being processed' + }, + [ERR_CODES.MODEL_NOT_LOADED]: { + name: 'MODEL_NOT_LOADED', + message: () => 'Model is not loaded' + }, + [ERR_CODES.MODEL_FILE_NOT_FOUND]: { + name: 'MODEL_FILE_NOT_FOUND', + message: (modelPath) => `Model file not found at: ${modelPath}` + }, + [ERR_CODES.BUFFER_LIMIT_EXCEEDED]: { + name: 'BUFFER_LIMIT_EXCEEDED', + message: (limit) => `Neural signal buffer exceeded limit of ${limit}` + }, + [ERR_CODES.FAILED_TO_START_JOB]: { + name: 'FAILED_TO_START_JOB', + message: (message) => `Failed to start inference job, error: ${message}` + }, + [ERR_CODES.INVALID_CONFIG]: { + name: 'INVALID_CONFIG', + message: (message) => `Invalid BCI configuration: ${message}` + }, + [ERR_CODES.EMBEDDER_WEIGHTS_INVALID]: { + name: 'EMBEDDER_WEIGHTS_INVALID', + message: (message) => `BCI embedder weights are invalid: ${message}` + } +}, { + name, + version +}) + +module.exports = { + ERR_CODES, + QvacErrorAddonBCI +} diff --git a/packages/bci-whispercpp/lib/util.js b/packages/bci-whispercpp/lib/util.js new file mode 100644 index 0000000000..e4c2d0e1fd --- /dev/null +++ b/packages/bci-whispercpp/lib/util.js @@ -0,0 +1,15 @@ +'use strict' + +function flattenSegments (output) { + const segments = [] + for (const entry of output) { + if (Array.isArray(entry)) { + segments.push(...entry) + } else if (entry && typeof entry.text === 'string') { + segments.push(entry) + } + } + return segments +} + +module.exports = { flattenSegments } diff --git a/packages/bci-whispercpp/lib/wer.js b/packages/bci-whispercpp/lib/wer.js new file mode 100644 index 0000000000..9a99084c27 --- /dev/null +++ b/packages/bci-whispercpp/lib/wer.js @@ -0,0 +1,40 @@ +'use strict' + +/** + * Compute Word Error Rate between hypothesis and reference. + * Uses Levenshtein distance on word sequences. + * @param {string} hypothesis + * @param {string} reference + * @returns {number} WER as a ratio (0.0 = perfect, 1.0 = 100% errors) + */ +function computeWER (hypothesis, reference) { + const hyp = hypothesis.toLowerCase().trim().split(/\s+/).filter(Boolean) + const ref = reference.toLowerCase().trim().split(/\s+/).filter(Boolean) + + if (ref.length === 0) return hyp.length === 0 ? 0 : 1 + + const n = ref.length + const m = hyp.length + const dp = Array.from({ length: n + 1 }, () => Array(m + 1).fill(0)) + + for (let i = 0; i <= n; i++) dp[i][0] = i + for (let j = 0; j <= m; j++) dp[0][j] = j + + for (let i = 1; i <= n; i++) { + for (let j = 1; j <= m; j++) { + if (ref[i - 1] === hyp[j - 1]) { + dp[i][j] = dp[i - 1][j - 1] + } else { + dp[i][j] = 1 + Math.min( + dp[i - 1][j], + dp[i][j - 1], + dp[i - 1][j - 1] + ) + } + } + } + + return dp[n][m] / n +} + +module.exports = { computeWER } diff --git a/packages/bci-whispercpp/package.json b/packages/bci-whispercpp/package.json new file mode 100644 index 0000000000..acdc8b134e --- /dev/null +++ b/packages/bci-whispercpp/package.json @@ -0,0 +1,92 @@ +{ + "name": "@qvac/bci-whispercpp", + "version": "0.1.0", + "description": "Brain-Computer Interface (BCI) neural signal transcription addon for qvac, powered by whisper.cpp", + "addon": true, + "engines": { + "bare": ">=1.24.0" + }, + "scripts": { + "lint": "standard \"examples/**/*.js\" \"test/**/*.js\" \"*.js\" \"lib/**/*.js\"", + "lint:fix": "standard --fix \"examples/**/*.js\" \"test/**/*.js\" \"*.js\" \"lib/**/*.js\"", + "lint-cpp": "clang-tidy -p build addon/src/js-interface/JSAdapter.cpp addon/src/js-interface/binding.cpp addon/src/model-interface/bci/BCIConfig.cpp addon/src/model-interface/bci/BCIModel.cpp addon/src/model-interface/bci/NeuralProcessor.cpp addon/src/addon/AddonJs.hpp addon/src/js-interface/JSAdapter.hpp addon/src/addon/BCIErrors.hpp addon/src/model-interface/BCITypes.hpp addon/src/model-interface/bci/BCIConfig.hpp addon/src/model-interface/bci/BCIModel.hpp addon/src/model-interface/bci/NeuralProcessor.hpp", + "build": "bare-make generate && bare-make build && bare-make install", + "build:pack": "mkdir -p dist && npm pack --pack-destination dist", + "test": "npm run test:integration", + "test:integration": "brittle-bare test/integration/addon.test.js", + "test:cpp:build": "bare-make generate -D BUILD_TESTING=ON && bare-make build --target test-bci-core && bare-make install", + "test:cpp:run": "./build/addon/tests/test-bci-core --gtest_output=xml:cpp-test-results.xml", + "test:cpp": "npm run test:cpp:build && npm run test:cpp:run", + "test:dts": "tsc -p tsconfig.dts.json" + }, + "files": [ + "addonLogging.js", + "addonLogging.d.ts", + "binding.js", + "bci.js", + "configChecker.js", + "index.js", + "index.d.ts", + "prebuilds", + "lib", + "README.md", + "CHANGELOG.md", + "LICENSE", + "NOTICE" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/tetherto/qvac.git", + "directory": "packages/bci-whispercpp" + }, + "author": "Tether", + "keywords": [ + "tether", + "addon", + "whisper", + "bci", + "brain-computer-interface", + "neural", + "qvac" + ], + "license": "Apache-2.0", + "bugs": "https://github.com/tetherto/qvac/issues", + "homepage": "https://github.com/tetherto/qvac/tree/main/packages/bci-whispercpp#readme", + "devDependencies": { + "@types/node": "^22.15.3", + "bare-buffer": "^3.4.2", + "brittle": "^3.17.0", + "cmake-bare": "^1.7.5", + "cmake-vcpkg": "^1.1.0", + "fs": "npm:bare-fs", + "os": "npm:bare-os@^3.6.2", + "standard": "^17.1.2", + "tty": "npm:bare-node-tty", + "typescript": "^5.9.2" + }, + "dependencies": { + "@qvac/error": "^0.1.0", + "@qvac/infer-base": "^0.4.0", + "@qvac/logging": "^0.1.0", + "bare-fs": "^4.5.1", + "bare-path": "^3.0.0", + "path": "npm:bare-path" + }, + "exports": { + "./package": "./package.json", + ".": { + "types": "./index.d.ts", + "default": "./index.js" + }, + "./addonLogging": { + "types": "./addonLogging.d.ts", + "default": "./addonLogging.js" + }, + "./addonLogging.js": "./addonLogging.js", + "./bci": "./bci.js", + "./bci.js": "./bci.js", + "./binding": "./binding.js", + "./binding.js": "./binding.js" + }, + "types": "index.d.ts" +} diff --git a/packages/bci-whispercpp/scripts/convert-model.py b/packages/bci-whispercpp/scripts/convert-model.py new file mode 100644 index 0000000000..0077aababc --- /dev/null +++ b/packages/bci-whispercpp/scripts/convert-model.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +""" +Convert BrainWhisperer checkpoint to GGML model + embedder weights for whisper.cpp. + +Produces two files required for BCI inference: + 1. GGML model (--output): whisper encoder/decoder weights, tokenizer, positional + embedding, windowed attention params in header + 2. Embedder file (--embedder-output): day projection weights (low-rank AยทB per day), + month projections, session-to-day mapping + +Both files must be in the same directory at runtime. The C++ addon loads the embedder +from the same directory as the GGML model (looks for "bci-embedder.bin"). + +Usage: + python3 scripts/convert-model.py \\ + --checkpoint /path/to/epoch=93-val_wer=0.0910.ckpt \\ + --output models/ggml-bci-windowed.bin \\ + --embedder-output models/bci-embedder.bin +""" + +import argparse +import math +import os +import struct + +import numpy as np +import torch + + +def merge_lora_weights(state_dict, alpha=16, r=8): + scaling = alpha / r + merged = {} + lora_pairs = {} + + for key, tensor in state_dict.items(): + if ".lora_A.default.weight" in key: + base_key = key.replace(".lora_A.default.weight", "") + lora_pairs.setdefault(base_key, {})["A"] = tensor + elif ".lora_B.default.weight" in key: + base_key = key.replace(".lora_B.default.weight", "") + lora_pairs.setdefault(base_key, {})["B"] = tensor + elif ".base_layer." in key: + clean_key = key.replace(".base_layer.", ".") + merged[clean_key] = tensor.clone() + else: + merged[key] = tensor + + for base_key, pair in lora_pairs.items(): + if "A" not in pair or "B" not in pair: + continue + A, B = pair["A"], pair["B"] + delta = (B @ A) * scaling + weight_key = base_key + ".weight" + if weight_key in merged: + merged[weight_key] = merged[weight_key] + delta + + return merged + + +def build_positional_embedding(state_dict, d_model=384, day_idx=0, sessions=None): + """Build the combined positional embedding for whisper.cpp. + + The BCI encoder applies two separate positional encodings: + 1. Learned time positions (embed_positions) โ†’ first d_model//2 dims + 2. Sinusoidal day encoding (PositionalEncoding) โ†’ last d_model//2 dims + + whisper.cpp applies a single encoder.positional_embedding after conv2, + so we must combine both into one (1500, d_model) tensor. + """ + half = d_model - d_model // 2 # 192 + + pe = np.zeros((1500, d_model), dtype=np.float32) + + # First half: learned time positional encoding from the trained model + time_pe_key = "model.whisper.model.encoder.embed_positions.weight" + if time_pe_key in state_dict: + time_pe = state_dict[time_pe_key].numpy() # (1500, 192) + pe[:, :half] = time_pe + print(f" Time positional encoding: shape={time_pe.shape}, " + f"range=[{time_pe.min():.4f}, {time_pe.max():.4f}]") + else: + print(" WARNING: embed_positions.weight not found, using zeros for time encoding") + + # Second half: sinusoidal day encoding + # For day_idx=0 (session index), resolve through SessionsToDays to get day number + # Default: day_number=0 โ†’ PositionalEncoding(192) at position 0 = [sin(0),cos(0),...] = [0,1,0,1,...] + day_number = day_idx + if sessions: + from datetime import datetime + sorted_sessions = sorted(sessions) + fmt = "%Y.%m.%d" + datetimes = [datetime.strptime(s[-10:], fmt) for s in sorted_sessions] + if day_idx < len(datetimes): + day_number = (datetimes[day_idx] - datetimes[0]).days + + day_enc = np.zeros(half, dtype=np.float32) + div_term = np.exp(np.arange(0, half, 2, dtype=np.float32) * (-math.log(10000.0) / half)) + day_enc[0::2] = np.sin(day_number * div_term) + day_enc[1::2] = np.cos(day_number * div_term) + pe[:, -half:] = day_enc + print(f" Day encoding: day_number={day_number}, " + f"range=[{day_enc.min():.4f}, {day_enc.max():.4f}]") + + return pe + + +# Byte encoder/decoder for tokenizer (from whisper.cpp converter) +def bytes_to_unicode(): + bs = list(range(ord("!"), ord("~")+1)) + list(range(ord("ยก"), ord("ยฌ")+1)) + list(range(ord("ยฎ"), ord("รฟ")+1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8+n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +# GGML tensor name mapping (HuggingFace โ†’ whisper.cpp) +CONV_MAP = { + 'self_attn.k_proj': 'attn.key', + 'self_attn.q_proj': 'attn.query', + 'self_attn.v_proj': 'attn.value', + 'self_attn.out_proj': 'attn.out', + 'self_attn_layer_norm': 'attn_ln', + 'encoder_attn.q_proj': 'cross_attn.query', + 'encoder_attn.v_proj': 'cross_attn.value', + 'encoder_attn.out_proj': 'cross_attn.out', + 'encoder_attn_layer_norm': 'cross_attn_ln', + 'fc1': 'mlp.0', + 'fc2': 'mlp.2', + 'final_layer_norm': 'mlp_ln', +} + + +def rename_key(hf_key): + """Convert HuggingFace key to whisper.cpp GGML key.""" + parts = hf_key.split(".") + if len(parts) < 2: + return hf_key + + section = parts[0] # encoder or decoder + rest = parts[1:] + + if rest[0] == "layers": + rest[0] = "blocks" + layer_idx = rest[1] + inner = ".".join(rest[2:-1]) + + if inner == "encoder_attn.k_proj": + mapped = "cross_attn.key" + elif inner in CONV_MAP: + mapped = CONV_MAP[inner] + else: + mapped = inner + + return f"{section}.blocks.{layer_idx}.{mapped}.{rest[-1]}" + else: + simple_map = { + "layer_norm.bias": f"{section}.ln_post.bias" if section == "encoder" else f"{section}.ln.bias", + "layer_norm.weight": f"{section}.ln_post.weight" if section == "encoder" else f"{section}.ln.weight", + "embed_positions.weight": f"{section}.positional_embedding", + "embed_tokens.weight": f"{section}.token_embedding.weight", + } + rest_str = ".".join(rest) + if rest_str in simple_map: + return simple_map[rest_str] + return f"{section}.{rest_str}" + + +def export_embedder(state_dict, output_path): + """Export day projection / embedder weights to a binary file. + + The C++ NeuralProcessor loads this file to apply day-specific + projection (low-rank AยทB + month + softsign) before whisper inference. + Without it, raw smoothed signals are passed directly โ€” producing garbage. + """ + conv1_w = state_dict['model.embedders.0.conv1.weight'].numpy().flatten() + conv1_b = state_dict['model.embedders.0.conv1.bias'].numpy().flatten() + conv2_w = state_dict['model.embedders.0.conv2.weight'].numpy().flatten() + conv2_b = state_dict['model.embedders.0.conv2.bias'].numpy().flatten() + + embed_dim = int(state_dict['model.embedders.0.conv1.weight'].shape[0]) + num_features = int(state_dict['model.embedders.0.conv1.weight'].shape[1]) + kernel_size1 = int(state_dict['model.embedders.0.conv1.weight'].shape[2]) + kernel_size2 = int(state_dict['model.embedders.0.conv2.weight'].shape[2]) + + day_a_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_As.')], + key=lambda k: int(k.split('.')[-1])) + day_b_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_Bs.')], + key=lambda k: int(k.split('.')[-1])) + day_bias_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_biases.')], + key=lambda k: int(k.split('.')[-1])) + month_w_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.month_weights.')], + key=lambda k: int(k.split('.')[-1])) + month_b_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.month_biases.')], + key=lambda k: int(k.split('.')[-1])) + + num_days = len(day_a_keys) + num_months = len(month_w_keys) + r = int(state_dict[day_a_keys[0]].shape[1]) if day_a_keys else 0 + + s2d = state_dict.get('model.embedders.0.sessions_to_days.session_to_idx_map') + + EMBEDDER_MAGIC = 0x42434945 + os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True) + + with open(output_path, "wb") as f: + f.write(struct.pack('I', EMBEDDER_MAGIC)) + f.write(struct.pack('I', 1)) # version + f.write(struct.pack('I', num_features)) + f.write(struct.pack('I', embed_dim)) + f.write(struct.pack('I', kernel_size1)) + f.write(struct.pack('I', kernel_size2)) + f.write(struct.pack('I', 2)) # stride2 + f.write(struct.pack('I', num_days)) + f.write(struct.pack('I', num_months)) + f.write(struct.pack('I', r)) + + for arr in [conv1_w, conv1_b, conv2_w, conv2_b]: + f.write(struct.pack('I', len(arr))) + f.write(arr.astype(np.float32).tobytes()) + + if s2d is not None: + s2d_np = s2d.numpy().astype(np.int32).flatten() + f.write(struct.pack('I', len(s2d_np))) + f.write(s2d_np.tobytes()) + else: + f.write(struct.pack('I', 0)) + + for i in range(num_days): + for keys in [day_a_keys, day_b_keys, day_bias_keys]: + data = state_dict[keys[i]].numpy().flatten().astype(np.float32) + f.write(struct.pack('I', len(data))) + f.write(data.tobytes()) + + for i in range(num_months): + for keys in [month_w_keys, month_b_keys]: + data = state_dict[keys[i]].numpy().flatten().astype(np.float32) + f.write(struct.pack('I', len(data))) + f.write(data.tobytes()) + + size_mb = os.path.getsize(output_path) / (1024 * 1024) + print(f" Embedder: {output_path} ({size_mb:.1f} MB)") + print(f" {num_days} days, {num_months} months, rank={r}, " + f"features={num_features}") + + +def main(): + parser = argparse.ArgumentParser( + description="Convert BrainWhisperer checkpoint to GGML model + embedder") + parser.add_argument("--checkpoint", required=True, + help="Path to BrainWhisperer .ckpt file") + parser.add_argument("--output", default="models/ggml-bci-windowed.bin", + help="Output path for GGML model (default: models/ggml-bci-windowed.bin)") + parser.add_argument("--embedder-output", default="models/bci-embedder.bin", + help="Output path for embedder weights (default: models/bci-embedder.bin)") + parser.add_argument("--f32", action="store_true", + help="Use f32 for all tensors (avoids f16 precision loss)") + parser.add_argument("--day-idx", type=int, default=1, + help="Day index for baked positional embedding (default: 1)") + parser.add_argument("--window-size", type=int, default=57, + help="Windowed attention size, 0 to disable (default: 57)") + parser.add_argument("--last-window-layer", type=int, default=3, + help="Last encoder layer with windowed attention (default: 3)") + args = parser.parse_args() + + os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True) + + # Load checkpoint + print(f"Loading checkpoint: {args.checkpoint}") + ckpt = torch.load(args.checkpoint, map_location="cpu", weights_only=False) + state_dict = ckpt["state_dict"] + config = ckpt["hyper_parameters"]["config"] + + # Merge LoRA + print("Merging LoRA weights...") + merged = merge_lora_weights(state_dict, alpha=16, r=8) + + # Build the model state dict for GGML + # We need: encoder (conv1/conv2 from embedder, layers 0-5 from encoder, layer_norm) + # decoder (LoRA-merged layers 0-3, embed_tokens, embed_positions, layer_norm) + # proj_out + + model_sd = {} + + # --- Encoder conv1 from EMBEDDER (k=7, 512->384) โ€” patched whisper.cpp supports this --- + model_sd["encoder.conv1.weight"] = merged["model.embedders.0.conv1.weight"] # (384, 512, 7) + model_sd["encoder.conv1.bias"] = merged["model.embedders.0.conv1.bias"] # (384,) + + # --- Encoder conv2 from EMBEDDER (k=3, stride=2) --- + model_sd["encoder.conv2.weight"] = merged["model.embedders.0.conv2.weight"] # (384, 384, 3) + model_sd["encoder.conv2.bias"] = merged["model.embedders.0.conv2.bias"] # (384,) + + # --- Encoder positional embedding (combined time + day encoding) --- + # Extract sessions list from checkpoint config for day number resolution + sessions = config.get("dataset", {}).get("sessions", None) + if sessions is None: + sessions = config.get("sessions", None) + print("Building combined positional embedding...") + model_sd["encoder.positional_embedding"] = torch.from_numpy( + build_positional_embedding(merged, d_model=384, day_idx=args.day_idx, sessions=sessions)) + + # --- Encoder transformer layers 0-5 --- + for layer_idx in range(6): + prefix_src = f"model.whisper.model.encoder.layers.{layer_idx}." + for key, tensor in merged.items(): + if key.startswith(prefix_src): + suffix = key[len("model.whisper.model.encoder."):] + ggml_name = rename_key(f"encoder.{suffix}") + model_sd[ggml_name] = tensor + + # --- Encoder layer norm --- + model_sd["encoder.ln_post.weight"] = merged["model.whisper.model.encoder.layer_norm.weight"] + model_sd["encoder.ln_post.bias"] = merged["model.whisper.model.encoder.layer_norm.bias"] + + # --- Decoder (LoRA-merged) --- + dec_prefix = "model.whisper.model.decoder." + for key, tensor in merged.items(): + if not key.startswith(dec_prefix): + continue + # Remove PEFT wrapper + clean = key[len("model.whisper.model."):] + clean = clean.replace("decoder.base_model.model.", "decoder.") + ggml_name = rename_key(clean) + model_sd[ggml_name] = tensor + + # --- proj_out --- + if "model.whisper.proj_out.weight" in merged: + # whisper.cpp skips proj_out (uses decoder.token_embedding transposed) + pass + + # Model hyperparameters + d_model = 384 + n_audio_head = 6 + n_audio_layer = 6 + n_text_head = 6 + n_text_layer = 4 + n_mels = 512 # neural signal channels (conv1 k=7 in patched whisper.cpp) + n_conv1_kernel = 7 + n_vocab = 51864 + n_audio_ctx = 1500 + n_text_ctx = 448 + + print(f"\nGGML model: n_mels={n_mels}, encoder_layers={n_audio_layer}, " + f"decoder_layers={n_text_layer}, d_model={d_model}") + print(f"Tensors to write: {len(model_sd)}") + + # Mel filters: must have n_mel rows matching the header n_mels value, + # because whisper_set_mel_with_state validates n_mel == filters.n_mel. + mel_filters = np.zeros((n_mels, 201), dtype=np.float32) + + # Load tokenizer + from transformers import WhisperTokenizer + tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-tiny.en") + tokens_dict = tokenizer.get_vocab() + tokens_sorted = sorted(tokens_dict.items(), key=lambda x: x[1]) + + byte_decoder = {v: k for k, v in bytes_to_unicode().items()} + + # Write GGML file + print(f"\nWriting GGML model to: {args.output}") + with open(args.output, "wb") as fout: + # Magic + fout.write(struct.pack("i", 0x67676d6c)) + + # Header (matches whisper.cpp expected order) + fout.write(struct.pack("i", n_vocab)) + fout.write(struct.pack("i", n_audio_ctx)) + fout.write(struct.pack("i", d_model)) + fout.write(struct.pack("i", n_audio_head)) + fout.write(struct.pack("i", n_audio_layer)) + fout.write(struct.pack("i", n_text_ctx)) + fout.write(struct.pack("i", d_model)) + fout.write(struct.pack("i", n_text_head)) + fout.write(struct.pack("i", n_text_layer)) + fout.write(struct.pack("i", n_mels)) + ftype_global = 0 if args.f32 else 1 + fout.write(struct.pack("i", ftype_global)) # ftype: 0=f32, 1=f16 + fout.write(struct.pack("i", n_conv1_kernel)) # BCI extension + fout.write(struct.pack("i", args.window_size)) # BCI windowed attention + fout.write(struct.pack("i", args.last_window_layer)) + + # Mel filters (n_mels x 201, must match n_mels for whisper_set_mel validation) + fout.write(struct.pack("i", mel_filters.shape[0])) + fout.write(struct.pack("i", mel_filters.shape[1])) + for i in range(mel_filters.shape[0]): + for j in range(mel_filters.shape[1]): + fout.write(struct.pack("f", mel_filters[i][j])) + + # Tokenizer + fout.write(struct.pack("i", len(tokens_sorted))) + for token_str, token_id in tokens_sorted: + try: + text = bytearray([byte_decoder[c] for c in token_str]) + except KeyError: + text = token_str.encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) + + # Write tensors + for name, tensor in model_sd.items(): + data = tensor.squeeze().numpy() + + # Reshape conv bias from [n] to [n, 1] + if name in ["encoder.conv1.bias", "encoder.conv2.bias"]: + data = data.reshape(data.shape[0], 1) + + n_dims = len(data.shape) + + use_f16 = not args.f32 + ftype = 1 if use_f16 else 0 + if n_dims < 2 or \ + name == "encoder.conv1.bias" or \ + name == "encoder.conv2.bias" or \ + name == "encoder.positional_embedding" or \ + name == "decoder.positional_embedding": + use_f16 = False + ftype = 0 + + if use_f16: + data = data.astype(np.float16) + else: + data = data.astype(np.float32) + + # Tensor header: n_dims, name_len, ftype + name_bytes = name.encode("utf-8") + fout.write(struct.pack("iii", n_dims, len(name_bytes), ftype)) + + # Dims (reversed from numpy, as GGML expects) + for i in range(n_dims): + fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) + + fout.write(name_bytes) + data.tofile(fout) + + print(f" {name}: {data.shape} ({'f16' if ftype == 1 else 'f32'})") + + size_mb = os.path.getsize(args.output) / (1024 * 1024) + print(f" GGML model: {args.output} ({size_mb:.1f} MB)") + + # --- Export embedder weights --- + print(f"\nWriting embedder weights to: {args.embedder_output}") + export_embedder(state_dict, args.embedder_output) + + print(f"\nDone. Both files are required for inference:") + print(f" {args.output}") + print(f" {args.embedder_output}") + + +if __name__ == "__main__": + main() diff --git a/packages/bci-whispercpp/scripts/download-models.sh b/packages/bci-whispercpp/scripts/download-models.sh new file mode 100755 index 0000000000..8419eab3a5 --- /dev/null +++ b/packages/bci-whispercpp/scripts/download-models.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +# Downloads BCI models and test fixtures from the GitHub release. +# Requires: gh (GitHub CLI) authenticated with repo access. +# +# Usage: +# bash scripts/download-models.sh # download models + fixtures +# bash scripts/download-models.sh --models # models only +# bash scripts/download-models.sh --fixtures # fixtures only + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PACKAGE_DIR="$(dirname "$SCRIPT_DIR")" +MODELS_DIR="${PACKAGE_DIR}/models" +FIXTURES_DIR="${PACKAGE_DIR}/test/fixtures" +RELEASE_TAG="bci-test-assets-v0.1.0" +RELEASE_REPO="tetherto/qvac" + +download_models() { + mkdir -p "$MODELS_DIR" + + echo "Downloading BCI model files..." + gh release download "$RELEASE_TAG" \ + --repo "$RELEASE_REPO" \ + --pattern "ggml-bci-windowed.bin" --dir "$MODELS_DIR" \ + --clobber + + gh release download "$RELEASE_TAG" \ + --repo "$RELEASE_REPO" \ + --pattern "bci-embedder.bin" --dir "$MODELS_DIR" \ + --clobber + + echo "Model files:" && ls -lh "$MODELS_DIR"/*.bin +} + +download_fixtures() { + mkdir -p "$FIXTURES_DIR" + local temp_dir archive_path + temp_dir="$(mktemp -d "${TMPDIR:-/tmp}/bci-test-fixtures.XXXXXX")" + archive_path="${temp_dir}/bci-test-fixtures.tar.gz" + trap 'rm -rf "$temp_dir"' RETURN + + echo "Downloading BCI test fixtures..." + gh release download "$RELEASE_TAG" \ + --repo "$RELEASE_REPO" \ + --pattern "bci-test-fixtures.tar.gz" --dir "$temp_dir" \ + --clobber + + tar xzf "$archive_path" -C "$FIXTURES_DIR/" + + echo "Test fixtures:" && ls -lh "$FIXTURES_DIR"/*.bin +} + +case "${1:-all}" in + --models) download_models ;; + --fixtures) download_fixtures ;; + all) download_models; echo; download_fixtures ;; + *) + echo "Unknown option: ${1}" + echo "Usage: bash scripts/download-models.sh [all|--models|--fixtures]" + exit 1 + ;; +esac + +echo "" +echo "Done. Run tests with: npm run test:integration" diff --git a/packages/bci-whispercpp/test/fixtures/manifest.json b/packages/bci-whispercpp/test/fixtures/manifest.json new file mode 100644 index 0000000000..10da2de6c0 --- /dev/null +++ b/packages/bci-whispercpp/test/fixtures/manifest.json @@ -0,0 +1,39 @@ +{ + "samples": [ + { + "file": "neural_sample_0.bin", + "timesteps": 910, + "channels": 512, + "expected_text": "You can see the code at this point as well.", + "day_idx": 1 + }, + { + "file": "neural_sample_1.bin", + "timesteps": 749, + "channels": 512, + "expected_text": "How does it keep the cost down?", + "day_idx": 1 + }, + { + "file": "neural_sample_2.bin", + "timesteps": 502, + "channels": 512, + "expected_text": "Not too controversial.", + "day_idx": 1 + }, + { + "file": "neural_sample_3.bin", + "timesteps": 962, + "channels": 512, + "expected_text": "The jury and a judge work together on it.", + "day_idx": 1 + }, + { + "file": "neural_sample_4.bin", + "timesteps": 584, + "channels": 512, + "expected_text": "Were quite vocal about it.", + "day_idx": 1 + } + ] +} diff --git a/packages/bci-whispercpp/test/integration/addon.test.js b/packages/bci-whispercpp/test/integration/addon.test.js new file mode 100644 index 0000000000..cac8d4811b --- /dev/null +++ b/packages/bci-whispercpp/test/integration/addon.test.js @@ -0,0 +1,142 @@ +'use strict' + +const fs = require('bare-fs') +const path = require('bare-path') +const test = require('brittle') +const os = require('bare-os') +const BCIWhispercpp = require('../../index') +const { getTestPaths, computeWER, detectPlatform } = require('./helpers') +const { flattenSegments } = require('../../lib/util') + +const platform = detectPlatform() +const { manifest, getSamplePath } = getTestPaths() + +const MODEL_PATH = (os.hasEnv('WHISPER_MODEL_PATH') ? os.getEnv('WHISPER_MODEL_PATH') : null) || + path.join(__dirname, '..', '..', 'models', 'ggml-bci-windowed.bin') + +const hasModel = fs.existsSync(MODEL_PATH) + +// Skipping when the model is missing is fine for local dev, but in CI we +// want a loud failure. Set BCI_REQUIRE_MODEL=1 (e.g. on a runner with the +// assets pre-provisioned) to turn "missing model" into a hard error so the +// tests cannot silently pass with zero assertions. +const requireModel = os.hasEnv('BCI_REQUIRE_MODEL') && os.getEnv('BCI_REQUIRE_MODEL') === '1' + +if (requireModel && !hasModel) { + throw new Error( + 'BCI_REQUIRE_MODEL=1 but model file was not found at ' + MODEL_PATH + + '. Run `bash scripts/download-models.sh` or set WHISPER_MODEL_PATH.' + ) +} + +function bciConfigFor (sample) { + return typeof sample?.day_idx === 'number' ? { day_idx: sample.day_idx } : undefined +} + +test('[BCI] load and destroy via package interface', { skip: !hasModel, timeout: 120000 }, async (t) => { + const bci = new BCIWhispercpp({ + files: { model: MODEL_PATH } + }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + t.ok(bci, 'BCIWhispercpp should be created and loaded') + + await bci.destroy() + t.pass('BCIWhispercpp destroyed successfully') +}) + +test('[BCI] batch transcription from neural signal file', { skip: !hasModel, timeout: 120000 }, async (t) => { + t.ok(manifest.samples.length > 0, 'Manifest must contain at least one sample') + + const sample = manifest.samples[0] + const samplePath = getSamplePath(sample.file) + t.ok(fs.existsSync(samplePath), 'Fixture ' + sample.file + ' must exist') + + const bci = new BCIWhispercpp({ + files: { model: MODEL_PATH } + }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false }, + bciConfig: bciConfigFor(sample) + }) + + try { + await bci.load() + + const response = await bci.transcribeFile(samplePath) + const output = await response.await() + const segments = flattenSegments(output) + const text = segments.map(s => s.text).join('').trim() + + t.comment('Expected: "' + sample.expected_text + '"') + t.comment('Got: "' + text + '"') + + const wer = computeWER(text, sample.expected_text) + t.comment('WER: ' + (wer * 100).toFixed(1) + '%') + + t.ok(typeof text === 'string' && text.length > 0, 'Should produce a transcription string') + t.ok(segments.length > 0, 'Should have segments') + t.ok(typeof wer === 'number' && wer >= 0, 'WER should be a non-negative number') + } finally { + await bci.destroy() + } +}) + +test('[BCI] WER measurement across all test samples', { skip: !hasModel, timeout: 180000 }, async (t) => { + t.ok(manifest.samples.length > 0, 'Manifest must contain at least one sample') + + t.comment('Platform: ' + platform.label) + t.comment('Model: ' + MODEL_PATH) + + const results = [] + + const byDay = new Map() + for (const sample of manifest.samples) { + const key = typeof sample.day_idx === 'number' ? sample.day_idx : -1 + if (!byDay.has(key)) byDay.set(key, []) + byDay.get(key).push(sample) + } + + for (const [day, samples] of byDay) { + const bci = new BCIWhispercpp({ + files: { model: MODEL_PATH } + }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false }, + bciConfig: day >= 0 ? { day_idx: day } : undefined + }) + + try { + await bci.load() + + for (const sample of samples) { + const samplePath = getSamplePath(sample.file) + if (!fs.existsSync(samplePath)) { + t.fail('Fixture ' + sample.file + ' is missing') + continue + } + + const response = await bci.transcribeFile(samplePath) + const output = await response.await() + const segments = flattenSegments(output) + const text = segments.map(s => s.text).join('').trim() + const wer = computeWER(text, sample.expected_text) + results.push({ file: sample.file, expected: sample.expected_text, got: text, wer }) + + t.comment('[' + sample.file + '] expected=' + JSON.stringify(sample.expected_text) + + ' got=' + JSON.stringify(text) + ' WER=' + (wer * 100).toFixed(1) + '%') + } + } finally { + await bci.destroy() + } + } + + const avgWER = results.reduce((sum, r) => sum + r.wer, 0) / results.length + t.comment('Average WER: ' + (avgWER * 100).toFixed(1) + '% (n=' + results.length + ')') + + t.ok(results.length === manifest.samples.length, 'All manifest samples should have been evaluated') + t.ok(typeof avgWER === 'number' && avgWER < 0.5, 'Average WER should be below 50%') +}) diff --git a/packages/bci-whispercpp/test/integration/helpers.js b/packages/bci-whispercpp/test/integration/helpers.js new file mode 100644 index 0000000000..7e2d251343 --- /dev/null +++ b/packages/bci-whispercpp/test/integration/helpers.js @@ -0,0 +1,34 @@ +'use strict' + +const fs = require('bare-fs') +const path = require('bare-path') +const { computeWER } = require('../../lib/wer') + +function getTestPaths () { + const fixturesDir = path.join(__dirname, '..', 'fixtures') + const manifestPath = path.join(fixturesDir, 'manifest.json') + + let manifest = { samples: [] } + if (fs.existsSync(manifestPath)) { + manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')) + } + + return { + fixturesDir, + manifest, + getSamplePath: (filename) => path.join(fixturesDir, filename) + } +} + +function detectPlatform () { + const os = require('bare-os') + const arch = os.arch() + const platform = os.platform() + return { arch, platform, label: `${platform}-${arch}` } +} + +module.exports = { + getTestPaths, + detectPlatform, + computeWER +} diff --git a/packages/bci-whispercpp/tsconfig.dts.json b/packages/bci-whispercpp/tsconfig.dts.json new file mode 100644 index 0000000000..a47519c283 --- /dev/null +++ b/packages/bci-whispercpp/tsconfig.dts.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "bundler", + "lib": ["ES2022"], + "types": ["node"], + "skipLibCheck": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "noEmit": true + }, + "include": ["index.d.ts", "addonLogging.d.ts"] +} diff --git a/packages/bci-whispercpp/vcpkg-configuration.json b/packages/bci-whispercpp/vcpkg-configuration.json new file mode 100644 index 0000000000..91b7470f41 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-configuration.json @@ -0,0 +1,17 @@ +{ + "default-registry": { + "kind": "git", + "baseline": "acdd94de3e3938d44eea422876adb23c2b33d3a0", + "repository": "https://github.com/tetherto/qvac-registry-vcpkg.git" + }, + "registries": [ + { + "kind": "git", + "baseline": "16c71a39e5a0fc0bdb3fad03beef8f38ee00ee3b", + "repository": "https://github.com/microsoft/vcpkg", + "packages": [ + "gtest" + ] + } + ] +} diff --git a/packages/bci-whispercpp/vcpkg.json b/packages/bci-whispercpp/vcpkg.json new file mode 100644 index 0000000000..bb98e06f89 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg.json @@ -0,0 +1,26 @@ +{ + "name": "bci-whispercpp", + "version": "0.1.0", + "dependencies": [ + { + "name": "qvac-lib-inference-addon-cpp", + "version>=": "1.1.5#1" + }, + "qvac-lint-cpp", + "whisper-cpp" + ], + "features": { + "tests": { + "description": "Build C++ unit tests", + "dependencies": [ + "gtest" + ] + } + }, + "overrides": [ + { + "name": "whisper-cpp", + "version": "1.8.4.2" + } + ] +} diff --git a/packages/bci-whispercpp/vcpkg/toolchains/linux-clang.cmake b/packages/bci-whispercpp/vcpkg/toolchains/linux-clang.cmake new file mode 100644 index 0000000000..542aa9dba1 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg/toolchains/linux-clang.cmake @@ -0,0 +1,4 @@ +set(CMAKE_C_COMPILER "clang-19") +set(CMAKE_CXX_COMPILER "clang++-19") + +include("$ENV{VCPKG_ROOT}/scripts/toolchains/linux.cmake") diff --git a/packages/bci-whispercpp/vcpkg/triplets/arm64-linux.cmake b/packages/bci-whispercpp/vcpkg/triplets/arm64-linux.cmake new file mode 100644 index 0000000000..77c0e6b318 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg/triplets/arm64-linux.cmake @@ -0,0 +1,9 @@ +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) +set(VCPKG_CMAKE_SYSTEM_NAME Linux) + +set(VCPKG_CHAINLOAD_TOOLCHAIN_FILE "${CMAKE_CURRENT_LIST_DIR}/../toolchains/linux-clang.cmake") +set(VCPKG_C_FLAGS "-fPIC") +set(VCPKG_CXX_FLAGS "-fPIC -stdlib=libc++") +set(VCPKG_LINKER_FLAGS "-stdlib=libc++") diff --git a/packages/bci-whispercpp/vcpkg/triplets/x64-linux.cmake b/packages/bci-whispercpp/vcpkg/triplets/x64-linux.cmake new file mode 100644 index 0000000000..7660720b49 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg/triplets/x64-linux.cmake @@ -0,0 +1,9 @@ +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) +set(VCPKG_CMAKE_SYSTEM_NAME Linux) + +set(VCPKG_CHAINLOAD_TOOLCHAIN_FILE "${CMAKE_CURRENT_LIST_DIR}/../toolchains/linux-clang.cmake") +set(VCPKG_C_FLAGS "-fPIC") +set(VCPKG_CXX_FLAGS "-fPIC -stdlib=libc++") +set(VCPKG_LINKER_FLAGS "-stdlib=libc++")