diff --git a/.github/workflows/integration-mobile-test-bci-whispercpp.yml b/.github/workflows/integration-mobile-test-bci-whispercpp.yml new file mode 100644 index 0000000000..5ed68bdfb7 --- /dev/null +++ b/.github/workflows/integration-mobile-test-bci-whispercpp.yml @@ -0,0 +1,1333 @@ +name: "Mobile Integration Tests (BCI Whispercpp)" + +on: + workflow_call: + inputs: + ref: + description: "Git ref to checkout" + type: string + required: false + repository: + description: "Repository to checkout" + type: string + required: false + workdir: + description: "Working directory (optional)" + required: false + type: string + default: "packages/bci-whispercpp" + workflow_dispatch: + inputs: + ref: + description: "Git ref (branch/tag/SHA) to test - defaults to current branch" + type: string + required: false + version: + description: "NPM package version to test (default: latest)" + type: string + required: false + default: latest + workdir: + description: "Working directory (optional)" + required: false + type: string + default: "packages/bci-whispercpp" + +env: + NODE_VERSION: 'lts/*' + ADDON_NAME: '@qvac/bci-whispercpp' + PREBUILD_ARTIFACT_PREFIX: 'bci-whispercpp-' + TEST_FRAMEWORK_REF: 'main' + APP_BUNDLE_ID: 'io.tether.test.qvac' + +jobs: + build-and-test: + name: Build ${{ matrix.platform }} and Run E2E Tests + runs-on: ${{ matrix.runner }} + timeout-minutes: 120 + permissions: + contents: read + packages: read + strategy: + fail-fast: false + matrix: + include: + - platform: Android + os: ubuntu-24.04 + runner: ai-run-linux # Self-hosted runner to avoid Maven Central 403 issues + - platform: iOS + os: macos-14 + runner: macos-14 + + steps: + # Free up disk space on Ubuntu runner to prevent "No space left on device" errors + - name: Free up disk space + if: matrix.platform == 'Android' + run: | + echo "Disk space before cleanup:" + df -h + # Remove unnecessary software to free up disk space (|| true to handle self-hosted runners) + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + sudo rm -rf /opt/hostedtoolcache/CodeQL || true + sudo rm -rf /opt/hostedtoolcache/go || true + sudo rm -rf /opt/hostedtoolcache/Python || true + sudo rm -rf /opt/hostedtoolcache/Ruby || true + sudo rm -rf /usr/local/lib/android/sdk/ndk || true + sudo rm -rf /usr/local/share/boost || true + sudo rm -rf /usr/share/swift || true + sudo docker image prune --all --force || true + # Clean APT cache + sudo apt-get clean || true + echo "Disk space after cleanup:" + df -h + + - name: Checkout addon repository + uses: actions/checkout@v6 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || github.ref }} + token: ${{ secrets.PAT_TOKEN }} + path: addon + fetch-depth: 0 + + - name: Checkout mobile test framework + uses: actions/checkout@v6 + with: + repository: tetherto/qvac-test-addon-mobile + ref: ${{ env.TEST_FRAMEWORK_REF }} + token: ${{ secrets.PAT_TOKEN }} + path: test-framework + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Configure scoped registry for @qvac and @tetherto packages + env: + GPR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + GIT_PAT: ${{ secrets.PAT_TOKEN }} + run: | + echo "Configuring scoped registry for @tetherto and @qvac packages..." + + # Configure addon registry (WORKDIR-aware) + cd "addon/${{ inputs.workdir }}" + cat > .npmrc < .npmrc </dev/null || echo "Warning: Failed to copy to android-ia32" + cp -r prebuilds/android-arm64 prebuilds/android-arm 2>/dev/null || echo "Warning: Failed to copy to android-arm" + cp -r prebuilds/android-arm64 prebuilds/android-x64 2>/dev/null || echo "Warning: Failed to copy to android-x64" + fi + + # Copy iOS prebuilds + if [ -d "prebuilds/ios-arm64" ]; then + cp -r prebuilds/ios-arm64 prebuilds/ios-arm64-simulator 2>/dev/null || echo "iOS simulator prebuilds already present" + cp -r prebuilds/ios-arm64 prebuilds/ios-x64-simulator 2>/dev/null || echo "iOS x64 simulator prebuilds already present" + fi + + echo "โœ… Mobile prebuilds prepared" + ls -la prebuilds/ + + - name: Download BCI model and fixtures into testAssets + working-directory: addon/${{ inputs.workdir }} + env: + GH_TOKEN: ${{ secrets.PAT_TOKEN }} + run: | + mkdir -p test/mobile/testAssets + echo "Downloading BCI model and test fixtures..." + gh release download bci-test-assets-v0.1.0 \ + --repo sharmaraju352/qvac \ + --pattern "ggml-bci-windowed.bin" --dir test/mobile/testAssets/ \ + --clobber + gh release download bci-test-assets-v0.1.0 \ + --repo sharmaraju352/qvac \ + --pattern "bci-embedder.bin" --dir test/mobile/testAssets/ \ + --clobber + gh release download bci-test-assets-v0.1.0 \ + --repo sharmaraju352/qvac \ + --pattern "bci-test-fixtures.tar.gz" --dir /tmp \ + --clobber + tar xzf /tmp/bci-test-fixtures.tar.gz -C test/mobile/testAssets/ + echo "Test assets:" && ls -la test/mobile/testAssets/ + + - name: Remove desktop prebuilds to save disk space + working-directory: addon/${{ inputs.workdir }} + run: | + echo "Removing desktop prebuilds to save disk space (keeping Android + iOS)..." + echo "Before cleanup:" + du -sh prebuilds/* 2>/dev/null || true + + # Remove desktop prebuilds only (not needed for mobile tests) + rm -rf prebuilds/darwin-* prebuilds/win32-* prebuilds/linux-* 2>/dev/null || true + + echo "After cleanup (Android + iOS only):" + du -sh prebuilds/* 2>/dev/null || true + df -h + + - name: Verify test files exist + working-directory: addon/${{ inputs.workdir }} + run: | + echo "Verifying addon has mobile tests..." + + if [ ! -d "test/mobile" ]; then + echo "โŒ ERROR: test/mobile directory not found!" + echo "" + echo "This workflow requires the addon to have mobile tests at:" + echo " test/mobile/" + echo "" + echo "Please create this directory with your test files." + echo "See qvac-test-addon-mobile README for test file format." + exit 1 + fi + + # Check for .cjs test files + CJS_COUNT=$(find test/mobile -name "*.cjs" -type f | wc -l) + if [ "$CJS_COUNT" -eq 0 ]; then + echo "โŒ ERROR: No .cjs test files found in test/mobile!" + exit 1 + fi + + echo "โœ… Mobile test files found:" + ls -la test/mobile/*.cjs + + # Check if testAssets exists + if [ -d "test/mobile/testAssets" ]; then + echo "" + echo "โœ… Test assets found:" + ls -lah test/mobile/testAssets/ + else + echo "" + echo "โ„น๏ธ No testAssets directory (this is optional)" + fi + + - name: Install Ninja build tool + if: matrix.platform == 'iOS' + run: | + echo "๐Ÿ“ฆ Installing Ninja build system..." + brew install ninja + ninja --version + echo "โœ… Ninja installed successfully" + + - name: Install addon dependencies + working-directory: addon/${{ inputs.workdir }} + run: | + echo "Installing addon dependencies..." + npm install + + - name: Pack addon + working-directory: addon/${{ inputs.workdir }} + run: | + echo "Packing addon..." + mkdir -p dist + npm pack --pack-destination dist + + # Verify pack file exists + PACK_FILE=$(ls dist/*.tgz | head -1) + if [ -f "$PACK_FILE" ]; then + SIZE=$(du -h "$PACK_FILE" | cut -f1) + echo "โœ… Pack file created: $PACK_FILE (Size: $SIZE)" + else + echo "โŒ Pack file not found in dist/" + exit 1 + fi + + - name: Setup test framework dependencies + working-directory: ./test-framework + run: | + echo "Setting up mobile test framework..." + npm install + echo "โœ… Test framework dependencies installed" + + - name: Build test app with addon + working-directory: ./test-framework + run: | + echo "Building test app with addon..." + echo "This will:" + echo " 1. Install the addon package" + echo " 2. Extract test code from addon's test/mobile/ directory" + echo " 3. Auto-detect and order test files by dependencies" + echo " 4. Generate backend.cjs with test functions" + echo " 5. Generate e2e tests for each test function" + echo " 6. Copy testAssets if available" + echo " 7. Bundle the app" + echo "" + + ADDON_PATH="${GITHUB_WORKSPACE}/addon/${{ inputs.workdir }}" + npm run build "$ADDON_PATH" "$ADDON_PATH/test/mobile" + + echo "" + echo "โœ… Test app built successfully" + + # Verify critical files were generated + if [ ! -f "backend/backend.cjs" ]; then + echo "โŒ ERROR: backend/backend.cjs was not generated!" + exit 1 + fi + + if [ ! -f "e2e/tests/app.test.js" ]; then + echo "โŒ ERROR: e2e/tests/app.test.js was not generated!" + exit 1 + fi + + if [ ! -f "backend/app.bundle" ]; then + echo "โŒ ERROR: backend/app.bundle was not created!" + exit 1 + fi + + echo "โœ… All required files generated successfully" + + # Show what tests were extracted + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "EXTRACTED TEST FUNCTIONS:" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + if [ -f "app/testConfig.js" ]; then + cat app/testConfig.js + fi + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + + - name: Display build summary + if: always() + working-directory: ./test-framework + run: | + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "๐Ÿ“Š BUILD SUMMARY" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + echo "Platform: ${{ matrix.platform }}" + echo "Addon: ${{ env.ADDON_NAME }}" + echo "" + echo "Generated Files:" + echo " backend/backend.cjs: $([ -f backend/backend.cjs ] && echo 'โœ…' || echo 'โŒ')" + echo " backend/app.bundle: $([ -f backend/app.bundle ] && echo 'โœ…' || echo 'โŒ')" + echo " app/testConfig.js: $([ -f app/testConfig.js ] && echo 'โœ…' || echo 'โŒ')" + echo " app/assetManifest.js: $([ -f app/assetManifest.js ] && echo 'โœ…' || echo 'โŒ')" + echo " e2e/tests/app.test.js: $([ -f e2e/tests/app.test.js ] && echo 'โœ…' || echo 'โŒ')" + echo "" + echo "Test Assets:" + if [ -d "testAssets" ]; then + ASSET_COUNT=$(find testAssets -type f | wc -l) + echo " โœ… $ASSET_COUNT file(s) in testAssets/" + else + echo " โ„น๏ธ No testAssets (optional)" + fi + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + + # Android-specific steps + - name: Set up JDK 17 + if: matrix.platform == 'Android' + uses: actions/setup-java@v5 + with: + java-version: 17 + distribution: temurin + + - name: Setup Android SDK + if: matrix.platform == 'Android' + uses: android-actions/setup-android@v3 + + - name: Generate Android project + if: matrix.platform == 'Android' + working-directory: ./test-framework + run: | + echo "Generating Android project with Expo..." + npx expo prebuild --platform android --clean + + - name: Build Android APK + if: matrix.platform == 'Android' + id: build_apk + working-directory: ./test-framework + run: | + echo "Building Android APK for Device Farm..." + export JAVA_HOME=$JAVA_HOME_17_X64 + + # Bundle JavaScript + echo "Bundling JavaScript code..." + npm run bundle + + if [ $? -ne 0 ]; then + echo "โŒ Bundle failed" + exit 1 + fi + + echo "โœ… Bundle completed successfully" + + # Build RELEASE APK (not debug) to ensure JS bundle is included + # Debug builds skip bundling by default and try to connect to Metro + # Release builds embed the JS bundle in the APK + cd android + echo "Building APK with Gradle (RELEASE with embedded JS bundle)..." + ./gradlew assembleRelease \ + -PreactNativeArchitectures=arm64-v8a \ + --no-daemon \ + --no-build-cache \ + --stacktrace + cd .. + + # Find the APK (look for release) + APK_PATH=$(find android/app/build/outputs/apk -name "*.apk" | grep "release" | grep -v "unaligned" | head -1) + + if [ -f "$APK_PATH" ]; then + # Convert to absolute path + APK_ABSOLUTE_PATH="${GITHUB_WORKSPACE}/test-framework/$APK_PATH" + SIZE=$(du -h "$APK_PATH" | cut -f1) + echo "โœ… APK built successfully: $APK_PATH (Size: $SIZE)" + echo "apk_path=$APK_ABSOLUTE_PATH" >> $GITHUB_OUTPUT + echo "app_type=ANDROID_APP" >> $GITHUB_OUTPUT + echo "app_name=test-app-${{ matrix.platform }}.apk" >> $GITHUB_OUTPUT + + # Clean up build intermediates to free disk space + echo "Cleaning up build intermediates..." + rm -rf android/app/build/intermediates + rm -rf android/.gradle + df -h + else + echo "โŒ APK file not found" + echo "Searching in android/app/build/outputs/apk:" + find android/app/build/outputs/apk -type f 2>/dev/null || echo "Directory not found" + exit 1 + fi + + # iOS-specific steps + - name: Set up Xcode version + if: matrix.platform == 'iOS' + run: | + echo "Available Xcode versions:" + ls /Applications | grep Xcode || echo "No Xcode apps found" + + echo "" + echo "Current Xcode (before switch):" + xcodebuild -version + + # React Native requires Xcode >= 16.1 + # Use Xcode 16.1 (has iOS 18.1 SDK which is stable and pre-installed) + if [ -d "/Applications/Xcode_16.1.app" ]; then + echo "" + echo "โœ… Switching to Xcode 16.1..." + sudo xcode-select -s /Applications/Xcode_16.1.app + elif [ -d "/Applications/Xcode_16.1.0.app" ]; then + echo "" + echo "โœ… Switching to Xcode 16.1.0..." + sudo xcode-select -s /Applications/Xcode_16.1.0.app + elif [ -d "/Applications/Xcode_16.2.app" ]; then + echo "" + echo "โš ๏ธ Using Xcode 16.2 (16.1 not found)..." + sudo xcode-select -s /Applications/Xcode_16.2.app + else + echo "" + echo "โŒ ERROR: No suitable Xcode version found (need >= 16.1)" + exit 1 + fi + + echo "" + echo "Current Xcode (after switch):" + xcodebuild -version + + echo "" + echo "Available iOS SDKs:" + xcodebuild -showsdks | grep -i ios + + - name: Install CocoaPods + if: matrix.platform == 'iOS' + run: | + sudo gem install cocoapods + pod --version + + - name: Create Keychain and Import Certificate + if: matrix.platform == 'iOS' + env: + BUILD_CERTIFICATE_BASE64: ${{ secrets.TEST_APP_APPLE_DISTRIBUTION_CERTIFICATE }} + P12_PASSWORD: ${{ secrets.APPLE_P12_PASSWORD }} + BUILD_PROVISION_PROFILE_BASE64: ${{ secrets.TEST_APP_APPLE_PROVISIONING_PROFILE }} + KEYCHAIN_PASSWORD: ${{ secrets.APPLE_KEYCHAIN_PASSWORD }} + run: | + CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12 + PP_PATH=$RUNNER_TEMP/build_pp.mobileprovision + KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db + + echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH + echo -n "$BUILD_PROVISION_PROFILE_BASE64" | base64 --decode -o $PP_PATH + + security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + security set-keychain-settings -lut 21600 $KEYCHAIN_PATH + security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + + security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH + security set-key-partition-list -S apple-tool:,apple: -s -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + security list-keychain -d user -s $KEYCHAIN_PATH + + # Extract UUID first, then copy with UUID as filename + PP_UUID=$(/usr/libexec/PlistBuddy -c 'Print :UUID' /dev/stdin <<< $(security cms -D -i $PP_PATH)) + echo "PP_UUID=$PP_UUID" >> $GITHUB_ENV + echo "Provisioning Profile UUID: $PP_UUID" + + # Copy provisioning profile with UUID as filename + mkdir -p ~/Library/MobileDevice/Provisioning\ Profiles + cp $PP_PATH ~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision + + security find-identity -p codesigning -v + + - name: Verify provisioning profile + if: matrix.platform == 'iOS' + run: | + echo "๐Ÿ” Verifying provisioning profile..." + echo "PP_UUID: $PP_UUID" + + PP_FILE=~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision + if [ ! -f "$PP_FILE" ]; then + echo "โŒ Provisioning profile file not found at: $PP_FILE" + ls -la ~/Library/MobileDevice/Provisioning\ Profiles/ + exit 1 + fi + + echo "๐Ÿ“‹ Provisioning Profile Details:" + security cms -D -i "$PP_FILE" > /tmp/profile.plist + + PROFILE_NAME=$(/usr/libexec/PlistBuddy -c "Print :Name" /tmp/profile.plist 2>/dev/null || echo "Unknown") + PROFILE_BUNDLE_ID=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:application-identifier" /tmp/profile.plist 2>/dev/null || echo "Unknown") + PROFILE_TEAM_ID=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:com.apple.developer.team-identifier" /tmp/profile.plist 2>/dev/null || echo "Unknown") + + # Detect profile type (Development, Ad Hoc, App Store, Enterprise) + HAS_DEVICES=$(/usr/libexec/PlistBuddy -c "Print :ProvisionedDevices" /tmp/profile.plist 2>/dev/null && echo "yes" || echo "no") + PROVISIONS_ALL=$(/usr/libexec/PlistBuddy -c "Print :ProvisionsAllDevices" /tmp/profile.plist 2>/dev/null || echo "false") + HAS_GET_TASK_ALLOW=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:get-task-allow" /tmp/profile.plist 2>/dev/null || echo "false") + + if [[ "$PROVISIONS_ALL" == "true" ]]; then + PROFILE_TYPE="Enterprise" + EXPORT_METHOD="enterprise" + elif [[ "$HAS_DEVICES" == "yes" && "$HAS_GET_TASK_ALLOW" == "true" ]]; then + PROFILE_TYPE="Development" + EXPORT_METHOD="development" + elif [[ "$HAS_DEVICES" == "yes" && "$HAS_GET_TASK_ALLOW" == "false" ]]; then + PROFILE_TYPE="Ad Hoc" + EXPORT_METHOD="ad-hoc" + else + PROFILE_TYPE="App Store" + EXPORT_METHOD="app-store" + fi + + echo " Name: $PROFILE_NAME" + echo " Type: $PROFILE_TYPE" + echo " Export Method: $EXPORT_METHOD" + echo " Application ID: $PROFILE_BUNDLE_ID" + echo " Team ID: $PROFILE_TEAM_ID" + echo " Expected Bundle ID: ${{ env.APP_BUNDLE_ID }}" + + # Save export method for next step + echo "EXPORT_METHOD=$EXPORT_METHOD" >> $GITHUB_ENV + + # Extract just the bundle ID part (remove team prefix) + BUNDLE_ID_ONLY=$(echo "$PROFILE_BUNDLE_ID" | sed 's/^[^.]*\.//') + + if [[ "$BUNDLE_ID_ONLY" != "${{ env.APP_BUNDLE_ID }}" ]]; then + echo "" + echo "โŒ ERROR: Provisioning profile bundle ID mismatch!" + echo " Profile has: $BUNDLE_ID_ONLY" + echo " Expected: ${{ env.APP_BUNDLE_ID }}" + echo "" + echo "The provisioning profile was created for a different bundle identifier." + echo "Please create a new provisioning profile for: ${{ env.APP_BUNDLE_ID }}" + exit 1 + fi + + echo "โœ… Provisioning profile matches expected bundle ID" + + - name: Generate iOS project + if: matrix.platform == 'iOS' + working-directory: ./test-framework + run: | + echo "Generating iOS project with Expo..." + npx expo prebuild --platform ios --clean + + - name: Install iOS dependencies + if: matrix.platform == 'iOS' + working-directory: ./test-framework/ios + run: | + echo "Installing CocoaPods dependencies..." + pod install --repo-update + + - name: Build and Archive iOS App + if: matrix.platform == 'iOS' + id: build_ios + working-directory: ./test-framework + run: | + echo "Building iOS app for Device Farm..." + + # Bundle JavaScript first + echo "Bundling JavaScript code..." + npm run bundle + + if [ $? -ne 0 ]; then + echo "โŒ Bundle failed" + exit 1 + fi + + echo "โœ… Bundle completed successfully" + + # Get scheme name + cd ios + SCHEME_NAME=$(xcodebuild -list | grep -A 1 "Schemes:" | grep -v "Schemes:" | head -1 | xargs) + echo "Detected scheme: $SCHEME_NAME" + + # Debug: Check bundle identifier in project + echo "๐Ÿ” Checking project configuration..." + BUNDLE_ID=$(xcodebuild -showBuildSettings -workspace $SCHEME_NAME.xcworkspace -scheme "$SCHEME_NAME" -configuration Release -destination "generic/platform=iOS" 2>/dev/null | grep PRODUCT_BUNDLE_IDENTIFIER | head -1 | awk '{print $3}') + echo "Bundle Identifier in project: $BUNDLE_ID" + + if [[ "$BUNDLE_ID" != "${{ env.APP_BUNDLE_ID }}" ]]; then + echo "โš ๏ธ Warning: Bundle ID mismatch in Xcode project!" + echo " Expected: ${{ env.APP_BUNDLE_ID }}" + echo " Found: $BUNDLE_ID" + fi + + # Debug: Check provisioning profile + echo "๐Ÿ” Provisioning profile UUID: $PP_UUID" + security cms -D -i ~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision | grep -A 5 "application-identifier\|Name\|TeamIdentifier" | head -20 || echo "Could not read profile details" + + # Archive for iOS device + xcodebuild -workspace $SCHEME_NAME.xcworkspace \ + -scheme "$SCHEME_NAME" \ + -sdk iphoneos \ + -configuration Release \ + -destination "generic/platform=iOS" \ + -archivePath $RUNNER_TEMP/$SCHEME_NAME.xcarchive \ + CODE_SIGN_STYLE=Manual \ + PROVISIONING_PROFILE_SPECIFIER="$PP_UUID" \ + CODE_SIGN_IDENTITY="Apple Distribution" \ + DEVELOPMENT_TEAM="${{ secrets.APPLE_TEAM_ID }}" \ + clean archive + + - name: Export IPA + if: matrix.platform == 'iOS' + id: export_ipa + working-directory: ./test-framework/ios + run: | + SCHEME_NAME=$(xcodebuild -list | grep -A 1 "Schemes:" | grep -v "Schemes:" | head -1 | xargs) + + # Create export options using auto-detected export method + # The EXPORT_METHOD was determined in the "Verify provisioning profile" step + echo "๐Ÿ“ฆ Using export method: $EXPORT_METHOD" + + EXPORT_OPTS_PATH=$RUNNER_TEMP/ExportOptions.plist + cat > $EXPORT_OPTS_PATH << EOF + + + + + method + $EXPORT_METHOD + teamID + ${{ secrets.APPLE_TEAM_ID }} + signingStyle + manual + provisioningProfiles + + ${{ env.APP_BUNDLE_ID }} + $PP_UUID + + + + EOF + + echo "๐Ÿ“‹ Export options:" + cat $EXPORT_OPTS_PATH + + xcodebuild -exportArchive \ + -archivePath $RUNNER_TEMP/$SCHEME_NAME.xcarchive \ + -exportOptionsPlist $EXPORT_OPTS_PATH \ + -exportPath $RUNNER_TEMP/build + + IPA_FILE=$(find $RUNNER_TEMP/build -name "*.ipa" | head -1) + if [ -f "$IPA_FILE" ]; then + echo "โœ… IPA exported: $IPA_FILE" + echo "apk_path=$IPA_FILE" >> $GITHUB_OUTPUT + echo "app_type=IOS_APP" >> $GITHUB_OUTPUT + echo "app_name=test-app-${{ matrix.platform }}.ipa" >> $GITHUB_OUTPUT + else + echo "โŒ IPA file not found" + exit 1 + fi + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Upload App to Device Farm + id: upload_app + run: | + if [ "${{ matrix.platform }}" == "Android" ]; then + APP_PATH="${{ steps.build_apk.outputs.apk_path }}" + APP_TYPE="${{ steps.build_apk.outputs.app_type }}" + APP_NAME="${{ steps.build_apk.outputs.app_name }}" + else + APP_PATH="${{ steps.export_ipa.outputs.apk_path }}" + APP_TYPE="${{ steps.export_ipa.outputs.app_type }}" + APP_NAME="${{ steps.export_ipa.outputs.app_name }}" + fi + + echo "๐Ÿ“ค Uploading app to AWS Device Farm..." + UPLOAD_RESPONSE=$(aws devicefarm create-upload \ + --project-arn "${{ secrets.AWS_DEVICE_FARM_PROJECT_ARN_WHISPERCPP }}" \ + --name "$APP_NAME" \ + --type "$APP_TYPE" \ + --output json) + + if [ $? -ne 0 ]; then + echo "โŒ Error creating upload in Device Farm" + echo "Response: $UPLOAD_RESPONSE" + exit 1 + fi + + APP_UPLOAD_URL=$(echo $UPLOAD_RESPONSE | jq -r '.upload.url') + APP_UPLOAD_ARN=$(echo $UPLOAD_RESPONSE | jq -r '.upload.arn') + echo "app_upload_arn=$APP_UPLOAD_ARN" >> $GITHUB_OUTPUT + echo "App upload ARN: $APP_UPLOAD_ARN" + + echo "Uploading app file: $APP_PATH" + curl -T "$APP_PATH" "$APP_UPLOAD_URL" + + if [ $? -ne 0 ]; then + echo "โŒ Error uploading app file using curl" + exit 1 + fi + + # Wait for processing + echo "โณ Waiting for upload to be processed..." + MAX_ATTEMPTS=30 + ATTEMPT=1 + while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + STATUS=$(aws devicefarm get-upload --arn "$APP_UPLOAD_ARN" --query "upload.status" --output text) + echo "Status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS" + + if [ "$STATUS" = "SUCCEEDED" ]; then + echo "โœ… App upload successful" + break + fi + + if [ "$STATUS" = "FAILED" ]; then + echo "โŒ Upload failed" + aws devicefarm get-upload --arn "$APP_UPLOAD_ARN" + exit 1 + fi + + sleep 10 + ATTEMPT=$((ATTEMPT + 1)) + done + + - name: Verify test package generation + working-directory: ./test-framework/e2e + run: | + echo "Verifying e2e test package..." + + if [ ! -f "package.json" ]; then + echo "โŒ ERROR: e2e/package.json not found!" + exit 1 + fi + + if [ ! -f "tests/app.test.js" ]; then + echo "โŒ ERROR: e2e/tests/app.test.js not found!" + exit 1 + fi + + echo "โœ… E2E test files verified" + echo "" + echo "Test package contents:" + ls -la + echo "" + echo "Test files:" + ls -la tests/ + + - name: Package and Upload Test Package + id: upload_test_package + working-directory: ./test-framework + run: | + echo "๐Ÿ“ฆ Packaging e2e tests..." + cd e2e + + # Install dependencies before packing + npm install + + # Create tarball + npm pack + + # Create zip with test files only (no node_modules - will be installed on Device Farm) + ZIP_NAME="e2e-tests-${{ matrix.platform }}.zip" + zip -r "$ZIP_NAME" \ + package.json \ + tests/ \ + *.tgz + + echo "๐Ÿ“ฆ Package contents (excluding node_modules):" + unzip -l "$ZIP_NAME" | head -20 + + # Verify zip was created + if [ ! -f "$ZIP_NAME" ]; then + echo "โŒ ERROR: Failed to create test package zip" + exit 1 + fi + + SIZE=$(du -h "$ZIP_NAME" | cut -f1) + echo "โœ… Test package created: $ZIP_NAME (Size: $SIZE)" + + mv "$ZIP_NAME" "$GITHUB_WORKSPACE/" + + # Upload test package to AWS Device Farm + echo "๐Ÿ“ค Uploading test package to AWS Device Farm..." + UPLOAD_RESPONSE=$(aws devicefarm create-upload \ + --project-arn "${{ secrets.AWS_DEVICE_FARM_PROJECT_ARN_WHISPERCPP }}" \ + --name "$ZIP_NAME" \ + --type "APPIUM_NODE_TEST_PACKAGE" \ + --output json) + + if [ $? -ne 0 ]; then + echo "โŒ Error creating test package upload in Device Farm" + echo "Response: $UPLOAD_RESPONSE" + exit 1 + fi + + TEST_UPLOAD_URL=$(echo $UPLOAD_RESPONSE | jq -r '.upload.url') + TEST_UPLOAD_ARN=$(echo $UPLOAD_RESPONSE | jq -r '.upload.arn') + echo "test_package_upload_arn=$TEST_UPLOAD_ARN" >> $GITHUB_OUTPUT + echo "Test package upload ARN: $TEST_UPLOAD_ARN" + + echo "Uploading to: $TEST_UPLOAD_URL" + curl -T "$GITHUB_WORKSPACE/$ZIP_NAME" "$TEST_UPLOAD_URL" + + if [ $? -ne 0 ]; then + echo "โŒ Error uploading test package using curl" + exit 1 + fi + + # Wait for processing + echo "โณ Waiting for test package to be processed..." + MAX_ATTEMPTS=30 + ATTEMPT=1 + while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + STATUS=$(aws devicefarm get-upload --arn "$TEST_UPLOAD_ARN" --query "upload.status" --output text) + echo "Test package status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS" + + if [ "$STATUS" = "SUCCEEDED" ]; then + echo "โœ… Test package upload successful" + break + fi + + if [ "$STATUS" = "FAILED" ]; then + echo "โŒ Test package upload failed" + aws devicefarm get-upload --arn "$TEST_UPLOAD_ARN" + exit 1 + fi + + sleep 10 + ATTEMPT=$((ATTEMPT + 1)) + done + + if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then + echo "โŒ Timeout waiting for test package processing" + exit 1 + fi + + - name: Create and Upload Test Spec + id: upload_test_spec + run: | + echo "๐Ÿ“ Creating test spec for custom environment mode..." + echo "Platform: ${{ matrix.platform }}" + + # Create platform-specific test spec using printf for precise control + # NOTE: Both platforms use a 'before' hook in the wdio config to click the button + # This ensures a single Appium session for reliability (no session handoff issues) + # The before hook includes crash detection using queryAppState + if [ "${{ matrix.platform }}" == "Android" ]; then + PLATFORM="Android" + AUTOMATION="UiAutomator2" + HOST_LINE="android_test_host: amazon_linux_2" + BUNDLE_ID="${{ env.APP_BUNDLE_ID }}" + # Android wdio config with crash detection (bail:0 = continue on test failures, crash = process.exit) + # Timeout set to 15 minutes (900000ms) for audio transcription tests (whisper models can be slow) + WDIO_CONFIG='exports.config={runner:"local",hostname:"127.0.0.1",port:4723,path:"/wd/hub",specs:["*.spec.js","*.test.js"],maxInstances:1,bail:0,capabilities:[{platformName:"Android","appium:automationName":"UiAutomator2","appium:appPackage":"'${{ env.APP_BUNDLE_ID }}'","appium:appActivity":"'${{ env.APP_BUNDLE_ID }}'.MainActivity","appium:newCommandTimeout":300,"appium:autoGrantPermissions":true,"appium:autoAcceptAlerts":true,"appium:noReset":true,"appium:dontStopAppOnReset":true,"appium:forceAppLaunch":false}],logLevel:"debug",waitforTimeout:120000,connectionRetryTimeout:30000,connectionRetryCount:3,services:[],framework:"mocha",reporters:["spec"],mochaOpts:{ui:"bdd",timeout:900000},before:async function(capabilities,specs,browser){const BUNDLE_ID="'${{ env.APP_BUNDLE_ID }}'";global.appCrashed=false;global.checkAppCrash=async(stage)=>{try{const state=await browser.queryAppState(BUNDLE_ID);console.log("["+stage+"] App state: "+state+" (4=foreground,3=background,1=not running)");if(state<3){console.error("\\n๐Ÿ›‘ APP CRASHED at "+stage+"! State="+state);console.error("Check device logs for BareKit/native errors.\\n");global.appCrashed=true;process.exit(1);}return state;}catch(e){console.log("["+stage+"] queryAppState error: "+e.message);return-1;}};console.log("Checking initial app state...");await global.checkAppCrash("startup");console.log("Waiting for app to initialize...");await browser.pause(5000);await global.checkAppCrash("after-pause");const initText=await browser.$("android=new UiSelector().textContains(\"INITIALIZED\")");await initText.waitForDisplayed({timeout:60000});await global.checkAppCrash("after-init");console.log("App initialized, clicking Run Automated Tests...");const button=await browser.$("android=new UiSelector().textContains(\"Run Automated Tests\")");await button.waitForDisplayed({timeout:15000});await button.click();console.log("Button clicked!");await browser.pause(5000);await global.checkAppCrash("after-click");},afterTest:async function(test,context,{error}){if(global.appCrashed)return;await global.checkAppCrash("after-test:"+test.title);}};' + else + PLATFORM="iOS" + AUTOMATION="XCUITest" + # iOS 18+ requires macos_sequoia test host (supports iOS 15-26) + HOST_LINE="ios_test_host: macos_sequoia" + BUNDLE_ID="${{ env.APP_BUNDLE_ID }}" + # iOS wdio config with crash detection (bail:0 = continue on test failures, crash = process.exit) + # usePrebuiltWDA uses Device Farm's pre-built WebDriverAgent + # Timeout set to 15 minutes (900000ms) for audio transcription tests (whisper models can be slow) + WDIO_CONFIG='exports.config={runner:"local",hostname:"127.0.0.1",port:4723,path:"/wd/hub",specs:["*.spec.js","*.test.js"],maxInstances:1,bail:0,capabilities:[{platformName:"iOS","appium:automationName":"XCUITest","appium:bundleId":"'${{ env.APP_BUNDLE_ID }}'","appium:newCommandTimeout":300,"appium:noReset":true,"appium:forceAppLaunch":false,"appium:usePrebuiltWDA":true,"appium:wdaLocalPort":8100,"appium:showIOSLog":true,"appium:realDeviceLogger":"/usr/local/lib/node_modules/appium/node_modules/deviceconsole/deviceconsole"}],logLevel:"debug",waitforTimeout:120000,connectionRetryTimeout:30000,connectionRetryCount:3,services:[],framework:"mocha",reporters:["spec"],mochaOpts:{ui:"bdd",timeout:900000},before:async function(capabilities,specs,browser){const BUNDLE_ID="'${{ env.APP_BUNDLE_ID }}'";global.appCrashed=false;global.checkAppCrash=async(stage)=>{try{const state=await browser.queryAppState(BUNDLE_ID);console.log("["+stage+"] App state: "+state+" (4=foreground,3=background,1=not running)");if(state<3){console.error("\\n๐Ÿ›‘ APP CRASHED at "+stage+"! State="+state);console.error("Check device logs for BareKit/native errors.\\n");global.appCrashed=true;process.exit(1);}return state;}catch(e){console.log("["+stage+"] queryAppState error: "+e.message);return-1;}};console.log("Checking initial app state...");await global.checkAppCrash("startup");console.log("Waiting for app to initialize...");await browser.pause(5000);await global.checkAppCrash("after-pause");const initText=await browser.$("-ios predicate string:label CONTAINS \"INITIALIZED\"");await initText.waitForDisplayed({timeout:60000});await global.checkAppCrash("after-init");console.log("App initialized, clicking Run Automated Tests...");const button=await browser.$("-ios predicate string:label CONTAINS \"Run Automated Tests\"");await button.waitForDisplayed({timeout:15000});await button.click();console.log("Button clicked!");await browser.pause(5000);await global.checkAppCrash("after-click");},afterTest:async function(test,context,{error}){if(global.appCrashed)return;await global.checkAppCrash("after-test:"+test.title);}};' + fi + + # Base64 encode the wdio config to safely embed in YAML + # Note: macOS base64 doesn't support -w flag (no line wrapping by default) + WDIO_CONFIG_B64=$(echo "$WDIO_CONFIG" | base64 | tr -d '\n') + + # Create test spec YAML using printf to avoid variable expansion issues + { + printf 'version: 0.1\n' + if [ -n "$HOST_LINE" ]; then + printf '%s\n' "$HOST_LINE" + fi + printf '\n' + printf 'phases:\n' + printf ' install:\n' + printf ' commands:\n' + printf ' - echo "Setting up Node.js environment..."\n' + printf ' - export NVM_DIR=$HOME/.nvm\n' + printf ' - . $NVM_DIR/nvm.sh 2>/dev/null || true\n' + printf ' - nvm install 18 2>/dev/null || true\n' + printf ' - nvm use 18 2>/dev/null || true\n' + printf ' - node --version || echo "Using system node"\n' + printf '\n' + printf ' pre_test:\n' + printf ' commands:\n' + printf ' - echo "Setting up test environment..."\n' + printf ' - cd $DEVICEFARM_TEST_PACKAGE_PATH\n' + printf ' - ls -la\n' + printf ' - echo "Installing dependencies (clean install)..."\n' + printf ' - rm -rf node_modules package-lock.json 2>/dev/null || true\n' + printf ' - npm install --legacy-peer-deps 2>&1\n' + printf ' - echo "Verifying wdio installation..."\n' + printf ' - ls -la node_modules/.bin/ | grep wdio || echo "wdio not found in .bin"\n' + printf ' - node node_modules/@wdio/cli/bin/wdio.js --version || echo "wdio version check failed"\n' + printf ' - echo "Creating wdio config for Device Farm..."\n' + printf ' - echo "%s" | base64 -d > tests/wdio.config.devicefarm.js\n' "$WDIO_CONFIG_B64" + printf ' - cat tests/wdio.config.devicefarm.js\n' + + # iOS-specific WebDriverAgent configuration (only for iOS platform) + if [ "${{ matrix.platform }}" == "iOS" ]; then + printf ' - echo "๐Ÿ”ง Configuring WebDriverAgent for iOS..."\n' + printf ' - export DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH=$DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH_V9\n' + printf ' - echo "WDA Path: $DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH"\n' + fi + + printf ' - echo "๐Ÿš€ Starting Appium server..."\n' + printf ' - export APPIUM_BASE_PATH=/wd/hub\n' + printf ' - |\n' + printf ' appium --base-path=$APPIUM_BASE_PATH --log-timestamp \\\n' + printf ' --log-no-colors --relaxed-security --default-capabilities \\\n' + printf ' "{\\"appium:deviceName\\": \\"$DEVICEFARM_DEVICE_NAME\\", \\\n' + printf ' \\"platformName\\": \\"$DEVICEFARM_DEVICE_PLATFORM_NAME\\", \\\n' + printf ' \\"appium:app\\": \\"$DEVICEFARM_APP_PATH\\", \\\n' + printf ' \\"appium:udid\\":\\"$DEVICEFARM_DEVICE_UDID\\", \\\n' + printf ' \\"appium:platformVersion\\": \\"$DEVICEFARM_DEVICE_OS_VERSION\\", \\\n' + printf ' \\"appium:chromedriverExecutableDir\\": \\"$DEVICEFARM_CHROMEDRIVER_EXECUTABLE_DIR\\", \\\n' + printf ' \\"appium:wdaLocalPort\\": 8100, \\\n' + printf ' \\"appium:derivedDataPath\\": \\"$DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH\\", \\\n' + printf ' \\"appium:usePrebuiltWDA\\": true, \\\n' + printf ' \\"appium:automationName\\": \\"%s\\"}" \\\n' "$AUTOMATION" + printf ' >> $DEVICEFARM_LOG_DIR/appium.log 2>&1 &\n' + printf ' - echo "โณ Waiting for Appium to be ready (max 30 seconds)..."\n' + printf ' - |\n' + printf ' appium_initialization_time=0\n' + printf ' until curl --silent --fail "http://0.0.0.0:4723${APPIUM_BASE_PATH}/status"; do\n' + printf ' if [[ $appium_initialization_time -gt 30 ]]; then\n' + printf ' echo "โŒ Appium did not start within 30 seconds. Exiting..."\n' + printf ' cat $DEVICEFARM_LOG_DIR/appium.log\n' + printf ' exit 1\n' + printf ' fi\n' + printf ' appium_initialization_time=$((appium_initialization_time + 1))\n' + printf ' echo "Waiting for Appium to start on port 4723 (${appium_initialization_time}s/30s)..."\n' + printf ' sleep 1\n' + printf ' done\n' + printf ' - echo "โœ… Appium server is ready!"\n' + printf ' - curl -s http://0.0.0.0:4723${APPIUM_BASE_PATH}/status || echo "Status check failed"\n' + printf ' - echo "โ„น๏ธ Button click handled via WebDriverIO before hook (single session)"\n' + printf '\n' + printf ' test:\n' + printf ' commands:\n' + printf ' - echo "๐Ÿงช Running WebDriverIO tests..."\n' + printf ' - cd $DEVICEFARM_TEST_PACKAGE_PATH\n' + printf ' - echo "Verifying Appium is still running..."\n' + printf ' - ps aux | grep appium | grep -v grep || echo "โš ๏ธ Appium process not found"\n' + printf ' - curl -s http://127.0.0.1:4723/wd/hub/status || echo "โš ๏ธ Appium status check failed"\n' + printf ' - echo "Starting wdio test execution..."\n' + printf ' - node node_modules/@wdio/cli/bin/wdio.js run tests/wdio.config.devicefarm.js\n' + printf '\n' + printf ' post_test:\n' + printf ' commands:\n' + printf ' - echo "Test completed"\n' + + # iOS-specific: Output captured device logs + if [ "${{ matrix.platform }}" == "iOS" ]; then + printf ' - echo ""\n' + printf ' - echo "๐Ÿ“ฑ ========== iOS Device Console Logs =========="\n' + printf ' - |\n' + printf ' if [ -f "$DEVICEFARM_LOG_DIR/device_console.log" ]; then\n' + printf ' echo "Device console log found, showing whisper output:"\n' + printf ' grep -i "bare\|console\|whisper\|transcription\|audio\|test\|error" "$DEVICEFARM_LOG_DIR/device_console.log" || echo "No matching logs found"\n' + printf ' else\n' + printf ' echo "No device_console.log file found"\n' + printf ' fi\n' + printf ' - echo ""\n' + printf ' - echo "๐Ÿ“‹ Available log files:"\n' + printf ' - ls -lh $DEVICEFARM_LOG_DIR/ || echo "Log directory not accessible"\n' + fi + + printf '\n' + printf 'artifacts:\n' + printf ' - $DEVICEFARM_LOG_DIR\n' + } > testspec.yml + + echo "Generated test spec:" + echo "====================" + cat testspec.yml + echo "====================" + + echo "๐Ÿ“ค Uploading test spec to Device Farm..." + SPEC_RESPONSE=$(aws devicefarm create-upload \ + --project-arn "${{ secrets.AWS_DEVICE_FARM_PROJECT_ARN_WHISPERCPP }}" \ + --name "testspec.yml" \ + --type "APPIUM_NODE_TEST_SPEC" \ + --output json) + + SPEC_UPLOAD_URL=$(echo $SPEC_RESPONSE | jq -r '.upload.url') + SPEC_UPLOAD_ARN=$(echo $SPEC_RESPONSE | jq -r '.upload.arn') + echo "test_spec_arn=$SPEC_UPLOAD_ARN" >> $GITHUB_OUTPUT + + curl -T testspec.yml "$SPEC_UPLOAD_URL" + + # Wait for processing + echo "โณ Waiting for test spec to be processed..." + MAX_ATTEMPTS=20 + ATTEMPT=1 + while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + STATUS=$(aws devicefarm get-upload --arn "$SPEC_UPLOAD_ARN" --query "upload.status" --output text) + echo "Test spec status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS" + + if [ "$STATUS" = "SUCCEEDED" ]; then + echo "โœ… Test spec upload successful" + break + fi + + if [ "$STATUS" = "FAILED" ]; then + echo "โŒ Test spec upload failed" + aws devicefarm get-upload --arn "$SPEC_UPLOAD_ARN" + exit 1 + fi + + sleep 5 + ATTEMPT=$((ATTEMPT + 1)) + done + + - name: Schedule Device Farm Test Run + id: schedule_run + run: | + if [ "${{ matrix.platform }}" == "Android" ]; then + POOL_ARN="${{ secrets.ANDROID_DEVICE_POOL_ARN_WHISPERCPP }}" + else + POOL_ARN="${{ secrets.IOS_DEVICE_POOL_ARN_WHISPERCPP }}" + fi + + # Set run name based on trigger + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + RUN_NAME="Manual-${{ github.run_number }}-${{ matrix.platform }}" + else + RUN_NAME="PR-${{ github.event.pull_request.number || github.run_number }}-${{ matrix.platform }}" + fi + + echo "๐Ÿš€ Scheduling Device Farm test run..." + echo "Platform: ${{ matrix.platform }}" + echo "Device Pool ARN: $POOL_ARN" + echo "Run Name: $RUN_NAME" + + RUN_ARN=$(aws devicefarm schedule-run \ + --project-arn "${{ secrets.AWS_DEVICE_FARM_PROJECT_ARN_WHISPERCPP }}" \ + --device-pool-arn "$POOL_ARN" \ + --app-arn "${{ steps.upload_app.outputs.app_upload_arn }}" \ + --name "$RUN_NAME" \ + --test type=APPIUM_NODE,testPackageArn="${{ steps.upload_test_package.outputs.test_package_upload_arn }}",testSpecArn="${{ steps.upload_test_spec.outputs.test_spec_arn }}" \ + --query 'run.arn' --output text) + + echo "run_arn=$RUN_ARN" >> $GITHUB_OUTPUT + echo "โœ… Test run scheduled: $RUN_ARN" + + - name: Monitor Test Run + id: monitor_run + run: | + RUN_ARN="${{ steps.schedule_run.outputs.run_arn }}" + echo "๐Ÿ“Š Monitoring test run: $RUN_ARN" + echo "" + + MAX_WAIT_TIME=5400 # 90 minutes (whisper transcription can take longer) + ELAPSED=0 + + while true; do + STATUS=$(aws devicefarm get-run --arn "$RUN_ARN" --query 'run.status' --output text) + RESULT=$(aws devicefarm get-run --arn "$RUN_ARN" --query 'run.result' --output text) + + echo "โณ Run status: $STATUS (Result: $RESULT) - Elapsed: ${ELAPSED}s" + + if [[ "$STATUS" == "COMPLETED" ]]; then + echo "" + echo "โœ… Test run completed!" + break + fi + + if [ $ELAPSED -ge $MAX_WAIT_TIME ]; then + echo "" + echo "โŒ Timeout: Test run exceeded $MAX_WAIT_TIME seconds" + exit 1 + fi + + sleep 30 + ELAPSED=$((ELAPSED + 30)) + done + + # Get detailed results + RUN_DETAILS=$(aws devicefarm get-run --arn "$RUN_ARN" --output json) + RESULT=$(echo $RUN_DETAILS | jq -r '.run.result') + COUNTERS=$(echo $RUN_DETAILS | jq -r '.run.counters') + + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "๐Ÿ“Š FINAL TEST RESULTS" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "Result: $RESULT" + echo "" + + # Get jobs (devices) and extract actual test names + echo "๐Ÿ“ฑ Fetching detailed test results..." + JOBS=$(aws devicefarm list-jobs --arn "$RUN_ARN" --output json) + + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "๐Ÿ“‹ YOUR TESTS (excluding Setup/Teardown)" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + + DEVICE_COUNT=0 + USER_TEST_COUNT=0 + USER_PASSED=0 + USER_FAILED=0 + FAILED_TEST_DETAILS=() + + # Extract project ID and run ID from RUN_ARN for console links + # RUN_ARN format: arn:aws:devicefarm:us-west-2:ACCOUNT:run:PROJECT_ID/RUN_ID + PROJECT_ID=$(echo "$RUN_ARN" | sed -n 's/.*:run:\([^/]*\)\/.*/\1/p') + RUN_ID=$(echo "$RUN_ARN" | sed -n 's/.*:run:[^/]*\/\(.*\)/\1/p') + + # Process each device/job + for JOB_ARN in $(echo "$JOBS" | jq -r '.jobs[].arn'); do + DEVICE_COUNT=$((DEVICE_COUNT + 1)) + JOB_DETAILS=$(aws devicefarm get-job --arn "$JOB_ARN" --output json) + DEVICE_NAME=$(echo "$JOB_DETAILS" | jq -r '.job.device.name // "Unknown Device"') + JOB_RESULT=$(echo "$JOB_DETAILS" | jq -r '.job.result // "UNKNOWN"') + JOB_ID=$(echo "$JOB_ARN" | sed -n 's/.*:job:[^/]*\/[^/]*\/\(.*\)/\1/p') + + # Build console link (no region param needed when region is in subdomain) + CONSOLE_LINK="https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/${PROJECT_ID}/runs/${RUN_ID}/jobs/${JOB_ID}" + + if [ "$JOB_RESULT" = "PASSED" ]; then + echo " โœ… $DEVICE_NAME: PASSED" + USER_PASSED=$((USER_PASSED + 1)) + else + echo " โŒ $DEVICE_NAME: $JOB_RESULT" + USER_FAILED=$((USER_FAILED + 1)) + FAILED_TEST_DETAILS+=("โŒ $DEVICE_NAME: $JOB_RESULT") + FAILED_TEST_DETAILS+=(" ๐Ÿ“Ž View logs: $CONSOLE_LINK") + fi + + USER_TEST_COUNT=$((USER_TEST_COUNT + 1)) + echo "" + done + + # Show AWS Device Farm console link for the entire run + echo "" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "๐Ÿ”— AWS DEVICE FARM LINKS" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + echo "๐Ÿ“Š Full Run Details:" + echo " https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/${PROJECT_ID}/runs/${RUN_ID}" + echo "" + echo "๐Ÿ’ก Tip: Click the link above, then select a device to view:" + echo " โ€ข Video recording of the test" + echo " โ€ข Screenshots" + echo " โ€ข Device logs" + echo " โ€ข Test spec output (shows individual test results)" + echo "" + + # Summary + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "๐Ÿ“Š SUMMARY" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "" + echo "Devices tested: $DEVICE_COUNT" + echo " โœ… Passed: $USER_PASSED" + echo " โŒ Failed: $USER_FAILED" + echo "" + echo "๐Ÿ“‹ What these tests verify:" + echo " The E2E tests run on Device Farm check that your app:" + echo " 1. Shows 'INITIALIZED' after startup" + echo " 2. Runs all test functions from test/mobile/*.cjs" + echo " 3. Reports PASS/FAIL for each test function" + echo "" + echo "๐Ÿ’ก If a test times out but the video shows PASS:" + echo " โ†’ The app test passed, but E2E gave up waiting too early" + echo " โ†’ Check timeout settings in qvac-test-addon-mobile" + echo "" + echo "Device Farm Counters (includes Setup/Teardown):" + echo "$COUNTERS" | jq '.' + echo "" + + if [ ${#FAILED_TEST_DETAILS[@]} -gt 0 ]; then + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + echo "โŒ FAILED TESTS" + echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + for failed_info in "${FAILED_TEST_DETAILS[@]}"; do + echo "$failed_info" + done + echo "" + fi + + # Save for PR comment + echo "test_result=$RESULT" >> $GITHUB_OUTPUT + echo "test_counters<> $GITHUB_OUTPUT + echo "$COUNTERS" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + # Extract test counts + TOTAL=$(echo $COUNTERS | jq -r '.total // 0') + PASSED=$(echo $COUNTERS | jq -r '.passed // 0') + FAILED=$(echo $COUNTERS | jq -r '.failed // 0') + SKIPPED=$(echo $COUNTERS | jq -r '.skipped // 0') + + echo "test_total=$TOTAL" >> $GITHUB_OUTPUT + echo "test_passed=$PASSED" >> $GITHUB_OUTPUT + echo "test_failed=$FAILED" >> $GITHUB_OUTPUT + echo "test_skipped=$SKIPPED" >> $GITHUB_OUTPUT + + # Also save user test counts + echo "user_test_count=$USER_TEST_COUNT" >> $GITHUB_OUTPUT + echo "user_test_passed=$USER_PASSED" >> $GITHUB_OUTPUT + echo "user_test_failed=$USER_FAILED" >> $GITHUB_OUTPUT + + # Determine if tests passed or failed + # Red status (exit 1) if: + # 1. Device Farm overall result is not PASSED, OR + # 2. Any of your tests failed + # Green status (exit 0) only if all tests passed + + if [[ "$RESULT" != "PASSED" ]] || [ $USER_FAILED -gt 0 ]; then + echo "" + echo "โŒ Device Farm tests failed" + if [[ "$RESULT" != "PASSED" ]]; then + echo " Device Farm result: $RESULT" + fi + echo " Your tests: $USER_PASSED passed, $USER_FAILED failed (out of $USER_TEST_COUNT total)" + echo " Device Farm total: $TOTAL | Passed: $PASSED | Failed: $FAILED | Skipped: $SKIPPED" + exit 1 + fi + + echo "" + echo "โœ… All Device Farm tests passed!" + echo " Your tests: $USER_PASSED passed (out of $USER_TEST_COUNT total)" + echo " Device Farm total: $TOTAL | Passed: $PASSED | Failed: $FAILED | Skipped: $SKIPPED" diff --git a/.github/workflows/integration-test-bci-whispercpp.yml b/.github/workflows/integration-test-bci-whispercpp.yml new file mode 100644 index 0000000000..d1a49cff70 --- /dev/null +++ b/.github/workflows/integration-test-bci-whispercpp.yml @@ -0,0 +1,266 @@ +name: "Integration Tests (BCI Whispercpp)" + +on: + workflow_dispatch: + inputs: + prebuild_package: + description: "NPM package containing prebuilds (e.g. @qvac/bci-whispercpp@0.1.0)" + required: false + type: string + workflow_call: + inputs: + ref: + description: "ref" + type: string + required: false + repository: + type: string + required: false + default: "tetherto/qvac" + workdir: + description: "Working directory inside the repo (monorepo package path)" + type: string + required: false + default: "packages/bci-whispercpp" + +env: + PKG_DIR: packages/bci-whispercpp + +jobs: + run-integration-tests: + timeout-minutes: 60 + continue-on-error: true + runs-on: ${{ matrix.os }} + name: test-${{ matrix.platform }}-${{ matrix.arch }} + + permissions: + contents: read + packages: read + + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + platform: linux + arch: x64 + - os: macos-15-xlarge + platform: darwin + arch: arm64 + - os: macos-15-large + platform: darwin + arch: x64 + - os: windows-2022 + platform: win32 + arch: x64 + + steps: + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: lts/* + + - name: Windows - enable git long paths + if: ${{ matrix.platform == 'win32' }} + shell: powershell + run: git config --system core.longpaths true + + - name: Checkout code + uses: actions/checkout@v6 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || github.ref }} + token: ${{ secrets.PAT_TOKEN }} + + - name: Configure scoped registry (Unix) + if: ${{ matrix.platform != 'win32' }} + working-directory: ${{ inputs.workdir || env.PKG_DIR }} + env: + GPR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + GIT_PAT: ${{ secrets.PAT_TOKEN }} + shell: bash + run: | + set -eu + cat > .npmrc <$null + continue-on-error: true + + - name: Run integration tests (Unix) + if: ${{ matrix.platform != 'win32' }} + working-directory: ${{ inputs.workdir || env.PKG_DIR }} + shell: bash + run: npm run test:integration + env: + WHISPER_MODEL_PATH: models/ggml-bci-windowed.bin + GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }} + + - name: Run integration tests (Windows) + if: ${{ matrix.platform == 'win32' }} + working-directory: ${{ inputs.workdir || env.PKG_DIR }} + shell: powershell + run: npm run test:integration + env: + WHISPER_MODEL_PATH: models/ggml-bci-windowed.bin + GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }} diff --git a/.github/workflows/prebuilds-bci-whispercpp.yml b/.github/workflows/prebuilds-bci-whispercpp.yml new file mode 100644 index 0000000000..cc77685b85 --- /dev/null +++ b/.github/workflows/prebuilds-bci-whispercpp.yml @@ -0,0 +1,335 @@ +name: Prebuilds (BCI Whispercpp) + +on: + push: + branches: + - feat/bci-whispercpp + paths: + - ".github/workflows/prebuilds-bci-whispercpp.yml" + - ".github/workflows/integration-test-bci-whispercpp.yml" + - ".github/workflows/integration-mobile-test-bci-whispercpp.yml" + - "packages/bci-whispercpp/**" + workflow_dispatch: + inputs: + workdir: + description: "Working directory" + required: false + default: "packages/bci-whispercpp" + type: string + workflow_call: + inputs: + ref: + description: "ref" + type: string + repository: + type: string + required: false + default: "tetherto/qvac" + workdir: + description: "Working directory" + type: string + required: false + default: "packages/bci-whispercpp" + +jobs: + prebuild: + permissions: + contents: write + pull-requests: write + packages: write + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + platform: linux + arch: x64 + - os: ubuntu-24.04 + platform: android + arch: arm64 + flags: -D ANDROID_STL=c++_shared + - os: macos-14 + platform: ios + arch: arm64 + - os: macos-14 + platform: ios + arch: arm64 + tags: -simulator + flags: --simulator + - os: macos-14 + platform: ios + arch: x64 + tags: -simulator + flags: --simulator + - os: macos-14 + platform: darwin + arch: arm64 + - os: macos-15 + platform: darwin + arch: x64 + - os: windows-2022 + platform: win32 + arch: x64 + + runs-on: ${{ matrix.os }} + name: ${{ matrix.platform }}-${{ matrix.arch }}${{ matrix.tags }} + + env: + WORKDIR: ${{ inputs.workdir || 'packages/bci-whispercpp' }} + VCPKG_BINARY_SOURCES: "clear;files,${{ github.workspace }}/${{ inputs.workdir || 'packages/bci-whispercpp' }}/vcpkg/cache,readwrite" + VCPKG_BUILD_TYPE: release + VCPKG_KEEP_ENV_VARS: GIT_CONFIG_GLOBAL + + steps: + - if: ${{ matrix.platform == 'android' }} + name: Select NDK + run: | + echo "ANDROID_NDK=$ANDROID_NDK_LATEST_HOME" >> $GITHUB_ENV + echo "ANDROID_NDK_HOME=$ANDROID_NDK_LATEST_HOME" >> $GITHUB_ENV + echo "ANDROID_NDK_ROOT=$ANDROID_NDK_LATEST_HOME" >> $GITHUB_ENV + + - if: ${{ startsWith(matrix.os, 'ubuntu') }} + name: Maximize build space + run: | + sudo docker image prune --all --force + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /opt/ghc + sudo rm -rf /usr/share/dotnet + + - if: ${{ matrix.os == 'windows-2022' }} + name: Configure windows runner + run: | + git config --system core.longpaths true + $ccacheVersion = "4.10.2" + $ccacheUrl = "https://github.com/ccache/ccache/releases/download/v$ccacheVersion/ccache-$ccacheVersion-windows-x86_64.zip" + $ccacheZip = "$env:TEMP\ccache.zip" + $ccacheDir = "C:\ccache" + Invoke-WebRequest -Uri $ccacheUrl -OutFile $ccacheZip + Expand-Archive -Path $ccacheZip -DestinationPath $ccacheDir -Force + $ccacheBin = Get-ChildItem -Path $ccacheDir -Recurse -Filter "ccache.exe" | Select-Object -First 1 + echo "$($ccacheBin.DirectoryName)" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + + - if: ${{ matrix.os == 'windows-2022' }} + name: Configure ccache on Windows + shell: bash + run: | + ccache --set-config=max_size=2G + ccache --set-config=compression=true + ccache -z + echo "CMAKE_C_COMPILER_LAUNCHER=ccache" >> $GITHUB_ENV + echo "CMAKE_CXX_COMPILER_LAUNCHER=ccache" >> $GITHUB_ENV + + - if: ${{ matrix.os == 'windows-2022' }} + name: Get ccache cache (Windows) + uses: actions/cache@v5 + with: + key: ccache-bci-${{ matrix.platform }}-${{ matrix.arch }}-${{ hashFiles(format('{0}/vcpkg.json', inputs.workdir || 'packages/bci-whispercpp')) }} + path: ~\AppData\Local\ccache + restore-keys: | + ccache-bci-${{ matrix.platform }}-${{ matrix.arch }}- + + - name: Checkout repository + uses: actions/checkout@v6 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || github.ref }} + token: ${{ secrets.PAT_TOKEN }} + persist-credentials: false + fetch-depth: 0 + + - name: Configure git credentials for private repos + shell: bash + env: + GIT_PAT: ${{ secrets.PAT_TOKEN }} + run: | + GITCFG="${RUNNER_TEMP}/git-global.cfg" + cat > "$GITCFG" <> $GITHUB_ENV + + - name: Setup node + uses: actions/setup-node@v6 + with: + node-version: lts/* + + - name: Configure scoped registry + env: + GPR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + GIT_PAT: ${{ secrets.PAT_TOKEN }} + shell: bash + working-directory: ${{ env.WORKDIR }} + run: | + set -eu + cat > .npmrc <> $GITHUB_ENV + echo "$VCPKG_ROOT" >> $GITHUB_PATH + + - if: ${{ startsWith(matrix.os, 'ubuntu') }} + name: Configure vcpkg (Linux) + run: echo "VCPKG_ROOT=$VCPKG_INSTALLATION_ROOT" >> $GITHUB_ENV + + - if: ${{ matrix.os == 'windows-2022' }} + name: Configure vcpkg (Windows) + run: echo ("VCPKG_ROOT=$env:VCPKG_INSTALLATION_ROOT" -replace '\\', '/') >> $env:GITHUB_ENV + + - if: ${{ matrix.os == 'windows-2022' }} + name: Configure cmake generator (Windows) + run: | + echo "CMAKE_GENERATOR=Visual Studio 17 2022" >> $env:GITHUB_ENV + echo "CMAKE_GENERATOR_PLATFORM=x64" >> $env:GITHUB_ENV + echo "VCPKG_CMAKE_CONFIGURE_OPTIONS=--no-parallel-configure" >> $env:GITHUB_ENV + + - if: ${{ startsWith(matrix.os, 'macos') }} + name: Disable parallel configuration (macOS) + run: echo "VCPKG_CMAKE_CONFIGURE_OPTIONS=--no-parallel-configure" >> $GITHUB_ENV + + # โ”€โ”€ platform build dependencies โ”€โ”€ + + - if: ${{ startsWith(matrix.os, 'ubuntu') }} + name: Update c++ tools (Linux) + run: | + wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc > /dev/null + sudo chmod 644 /etc/apt/trusted.gpg.d/apt.llvm.org.asc + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 19 all + + - if: ${{ startsWith(matrix.os, 'ubuntu') }} + name: Install ccache (Linux) + run: sudo apt-get install -y ccache + + - if: ${{ startsWith(matrix.os, 'macos') }} + name: Install ccache (macOS) + run: brew install ccache + + - if: ${{ matrix.os != 'windows-2022' }} + name: Configure ccache + run: | + ccache --set-config=max_size=2G + ccache --set-config=compression=true + ccache -z + echo "CMAKE_C_COMPILER_LAUNCHER=ccache" >> $GITHUB_ENV + echo "CMAKE_CXX_COMPILER_LAUNCHER=ccache" >> $GITHUB_ENV + + - if: ${{ matrix.os != 'windows-2022' }} + name: Get ccache cache + uses: actions/cache@v5 + with: + key: ccache-bci-${{ matrix.platform }}-${{ matrix.arch }}-${{ hashFiles(format('{0}/vcpkg.json', inputs.workdir || 'packages/bci-whispercpp')) }} + path: ~/.cache/ccache + restore-keys: | + ccache-bci-${{ matrix.platform }}-${{ matrix.arch }}- + + - if: ${{ matrix.platform == 'linux' }} + name: Install Linux build deps + run: | + sudo apt-get update + sudo apt-get install -y libxi-dev libxtst-dev libxrandr-dev + sudo apt-get install -y libopenblas-dev liblapack-dev libfftw3-dev + + - if: ${{ startsWith(matrix.os, 'macos') }} + name: Install macOS build deps + run: brew install --quiet openblas lapack fftw + + - if: ${{ matrix.platform == 'android' }} + name: Configure runner for cross compilation - android + run: | + echo "ANDROID_TOOLCHAIN_ROOT=$(echo $ANDROID_NDK_HOME)/toolchains/llvm/prebuilt/linux-x86_64" >> $GITHUB_ENV + echo "ANDROID_NATIVE_API_LEVEL=34" >> $GITHUB_ENV + + # โ”€โ”€ build โ”€โ”€ + + - name: Install npm dependencies + working-directory: ${{ env.WORKDIR }} + run: npm install + + - name: Create vcpkg cache location + working-directory: ${{ env.WORKDIR }} + run: mkdir -p vcpkg/cache + + - name: Get vcpkg cache + uses: actions/cache@v5 + with: + key: vcpkg-bci-v1-${{ matrix.platform }}-${{ matrix.arch }}-${{ hashFiles( + format('{0}/vcpkg.json', inputs.workdir || 'packages/bci-whispercpp'), + format('{0}/vcpkg-configuration.json', inputs.workdir || 'packages/bci-whispercpp') + ) }} + path: ${{ env.WORKDIR }}/vcpkg/cache + restore-keys: | + vcpkg-bci-v1-${{ matrix.platform }}-${{ matrix.arch }}- + + - name: Run bare-make generate + shell: bash + working-directory: ${{ env.WORKDIR }} + run: | + WHISPER_FLAGS="-D WHISPER_USE_METAL=${{ (matrix.platform == 'darwin' || matrix.platform == 'ios') && 'ON' || 'OFF' }} -D WHISPER_USE_CUDA=OFF -D WHISPER_USE_OPENVINO=OFF" + bare-make generate --platform ${{ matrix.platform }} --arch ${{ matrix.arch }} ${{ matrix.flags }} $WHISPER_FLAGS + + - name: Run bare-make build + shell: bash + working-directory: ${{ env.WORKDIR }} + run: bare-make build + + - name: Run bare-make install + shell: bash + working-directory: ${{ env.WORKDIR }} + run: bare-make install + + - name: Strip debug symbols + if: ${{ matrix.platform != 'win32' && matrix.platform != 'android' }} + shell: bash + working-directory: ${{ env.WORKDIR }} + run: find prebuilds -name "*.bare" -exec strip {} \; + + - name: Show ccache stats + run: ccache -s + + - uses: actions/upload-artifact@v6 + with: + name: bci-whispercpp-${{ matrix.platform }}-${{ matrix.arch }}${{ matrix.tags }} + path: ${{ env.WORKDIR }}/prebuilds + + run-integration-tests: + needs: prebuild + uses: ./.github/workflows/integration-test-bci-whispercpp.yml + secrets: inherit + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || github.ref }} + workdir: ${{ inputs.workdir || 'packages/bci-whispercpp' }} + + run-mobile-integration-tests: + needs: prebuild + uses: ./.github/workflows/integration-mobile-test-bci-whispercpp.yml + secrets: inherit + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || github.ref }} + workdir: ${{ inputs.workdir || 'packages/bci-whispercpp' }} diff --git a/packages/bci-whispercpp/.gitignore b/packages/bci-whispercpp/.gitignore new file mode 100644 index 0000000000..33aefedf56 --- /dev/null +++ b/packages/bci-whispercpp/.gitignore @@ -0,0 +1,9 @@ +node_modules/ +build/ +prebuilds/ +models/ +package-lock.json +test/fixtures/*.bin +.clang-format +.clang-tidy +.valgrind.supp diff --git a/packages/bci-whispercpp/CMakeLists.txt b/packages/bci-whispercpp/CMakeLists.txt new file mode 100644 index 0000000000..dfb91051d8 --- /dev/null +++ b/packages/bci-whispercpp/CMakeLists.txt @@ -0,0 +1,124 @@ +cmake_minimum_required(VERSION 3.25) + +option(BUILD_TESTING "Build tests" OFF) + +if(BUILD_TESTING) + list(APPEND VCPKG_MANIFEST_FEATURES "tests") +endif() + +find_package(cmake-bare REQUIRED PATHS node_modules/cmake-bare) +find_package(cmake-vcpkg REQUIRED PATHS node_modules/cmake-vcpkg) + +set(VCPKG_OVERLAY_PORTS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg-overlays;${VCPKG_OVERLAY_PORTS}") + +project(bci-whispercpp CXX C) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_compile_options(-stdlib=libc++) + add_link_options(-stdlib=libc++ -static-libstdc++) +endif() + +find_path(QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS "qvac-lib-inference-addon-cpp/ModelInterfaces.hpp") +find_package(whisper CONFIG REQUIRED) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_definitions(-D_DEBUG) +endif() + +if(WIN32) + add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -DNOGDI) +endif() + +add_bare_module(bci-whispercpp EXPORTS) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(${bci-whispercpp}_module PRIVATE -Wl,--exclude-libs,ALL) +endif() + +target_sources( + ${bci-whispercpp} + PRIVATE + ${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp + ${PROJECT_SOURCE_DIR}/addon/src/js-interface/JSAdapter.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIConfig.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIModel.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/NeuralProcessor.cpp +) + +target_include_directories( + ${bci-whispercpp} + PRIVATE + ${PROJECT_SOURCE_DIR}/addon + ${PROJECT_SOURCE_DIR}/addon/src + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} +) + +target_link_libraries( + ${bci-whispercpp} + PRIVATE + whisper::whisper +) + +target_compile_definitions(${bci-whispercpp} PUBLIC JS_LOGGER) + +if(WIN32) + target_link_libraries( + ${bci-whispercpp} + PRIVATE + msvcrt.lib + ) +endif() + +if(BUILD_TESTING) + find_package(GTest REQUIRED) + + set(CORE_SRCS + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIConfig.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/BCIModel.cpp + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/NeuralProcessor.cpp + ) + + add_library(bci-core STATIC ${CORE_SRCS}) + + target_link_libraries(bci-core PRIVATE + whisper::whisper + ) + + target_include_directories(bci-core PRIVATE + ${PROJECT_SOURCE_DIR}/addon/ + ${PROJECT_SOURCE_DIR}/addon/src/ + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} + ) + + add_executable( + test-bci-core + ${PROJECT_SOURCE_DIR}/addon/tests/test_core.cpp + ) + + target_include_directories(test-bci-core PRIVATE + ${PROJECT_SOURCE_DIR}/addon/ + ${PROJECT_SOURCE_DIR}/addon/src/ + ${PROJECT_SOURCE_DIR}/addon/src/model-interface + ${PROJECT_SOURCE_DIR}/addon/src/model-interface/bci/ + ${PROJECT_SOURCE_DIR}/addon/tests/ + ${CMAKE_BINARY_DIR}/_bare/node_modules/bare-headers/include + ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS} + ) + + target_link_libraries(test-bci-core PRIVATE + bci-core + whisper::whisper + GTest::gtest_main + GTest::gmock + ) + + set_target_properties(test-bci-core PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/addon/tests + ) +endif() diff --git a/packages/bci-whispercpp/LICENSE b/packages/bci-whispercpp/LICENSE new file mode 100644 index 0000000000..7d199ae333 --- /dev/null +++ b/packages/bci-whispercpp/LICENSE @@ -0,0 +1,179 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +Copyright 2026 Tether Data, S.A. de C.V. diff --git a/packages/bci-whispercpp/NOTICE b/packages/bci-whispercpp/NOTICE new file mode 100644 index 0000000000..3df664bfac --- /dev/null +++ b/packages/bci-whispercpp/NOTICE @@ -0,0 +1,23 @@ +@qvac/bci-whispercpp +Copyright 2026 Tether Data, S.A. de C.V. + +This product includes third-party components under their +respective licenses. @qvac/bci-whispercpp itself is licensed under +Apache-2.0; bundled dependencies are governed by the licenses +listed below. + +========================================================================= +Third-Party Software Licenses +========================================================================= + +--- MIT --- + + whisper.cpp + https://github.com/ggerganov/whisper.cpp + Copyright (c) 2023-2024 Georgi Gerganov + +--- MIT --- + + ggml + https://github.com/ggerganov/ggml + Copyright (c) 2023-2024 Georgi Gerganov diff --git a/packages/bci-whispercpp/README.md b/packages/bci-whispercpp/README.md new file mode 100644 index 0000000000..e19812caf3 --- /dev/null +++ b/packages/bci-whispercpp/README.md @@ -0,0 +1,196 @@ +# @qvac/bci-whispercpp + +Brain-Computer Interface (BCI) neural signal transcription addon for qvac, powered by [whisper.cpp](https://github.com/tetherto/whisper.cpp). + +Transcribes multi-channel neural signals (e.g., 512-channel microelectrode array recordings) into text using a BCI-trained whisper model running natively via GGML. Output matches the Python BrainWhisperer reference model exactly. + +## Architecture + +``` +Neural Signal (512ch, 20ms bins) + โ”‚ + โ–ผ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ NeuralProcessor (C++) โ”‚ +โ”‚ - Gaussian smoothing โ”‚ std=2, kernel=100 +โ”‚ - Day-specific projection โ”‚ low-rank (AยทB) + month + softsign +โ”‚ - Pad to 3000 frames โ”‚ mel-major layout for whisper.cpp +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ mel features (512 ร— 3000) + โ–ผ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ whisper.cpp (patched) โ”‚ +โ”‚ - conv1 (k=7, 512โ†’384) โ”‚ BCI-trained embedder weights +โ”‚ - conv2 (k=3, stride=2) โ”‚ +โ”‚ - Positional encoding โ”‚ learned time PE + sinusoidal day PE +โ”‚ - 6-layer encoder โ”‚ windowed attention (w=57) on layers 0โ€“3 +โ”‚ - 4-layer decoder (LoRA) โ”‚ beam search, length_penalty=0.14 +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ–ผ + Text output +``` + +## Results + +Native GGML inference matches the Python BrainWhisperer reference on all test samples: + +| Sample | Ground Truth | GGML Native Output | Python Reference | +|--------|-------------|-------------------|-----------------| +| 0 | "You can see the code at this point as well." | "You can see the good at this point as well." | "you can see the good at this point as well" | +| 1 | "How does it keep the cost down?" | "How does it keep the cost said?" | "how does it keep the cost said" | +| 2 | "Not too controversial." | "Not too controversial." | "not too controversial" | +| 3 | "The jury and a judge work together on it." | "The jury and a judge work together on it." | "the jury and a judge work together on it" | +| 4 | "Were quite vocal about it." | "We're quite vocal about it." | "we're quite vocal about it" | + +## Neural Signal Format + +Binary files with the following layout: + +| Offset | Type | Description | +|--------|-----------|------------------------------------------------------| +| 0 | uint32 | Number of timesteps | +| 4 | uint32 | Number of channels | +| 8 | float32[] | Feature data (row-major: `features[t * channels + c]`) | + +Each timestep represents a 20ms bin of neural activity. Channels correspond to individual electrodes in a microelectrode array (typically 512 channels). + +## Installation + +```bash +cd packages/bci-whispercpp +npm install +VCPKG_ROOT=/path/to/vcpkg npm run build +``` + +### Prerequisites + +- **Bare runtime** >= 1.19.0 +- **CMake** >= 3.25 +- **vcpkg** with `VCPKG_ROOT` environment variable set + +### Model Conversion + +Convert a trained BrainWhisperer checkpoint. This produces **two files**, both required for inference: + +| File | Size | Description | +|------|------|-------------| +| `ggml-bci-windowed.bin` | ~84 MB | GGML model: whisper encoder/decoder (LoRA-merged), tokenizer, positional embedding, windowed attention header | +| `bci-embedder.bin` | ~24 MB | Day projection weights: low-rank AยทB matrices per recording day, month projections, session-to-day mapping | + +```bash +python3 scripts/convert-model.py \ + --checkpoint /path/to/epoch=93-val_wer=0.0910.ckpt +``` + +Both files are written to `models/` by default. All flags are optional: + +| Flag | Default | Description | +|------|---------|-------------| +| `--output` | `models/ggml-bci-windowed.bin` | GGML model output path | +| `--embedder-output` | `models/bci-embedder.bin` | Embedder weights output path | +| `--day-idx` | `1` | Day index for baked positional embedding | +| `--window-size` | `57` | Windowed attention size (0 to disable) | +| `--last-window-layer` | `3` | Last encoder layer with windowed attention | +| `--f32` | off | Use f32 for all tensors (avoids f16 precision loss, ~2x larger) | + +**Important:** Both files must be in the same directory at runtime. The C++ addon looks for `bci-embedder.bin` next to the GGML model file and will fail if it is missing. + +## Usage + +### Low-level API (BCIInterface) + +```javascript +const { BCIInterface } = require('@qvac/bci-whispercpp/bci') +const binding = require('@qvac/bci-whispercpp/binding') + +const config = { + contextParams: { model: '/path/to/ggml-bci.bin' }, + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false }, + bciConfig: { day_idx: 1 } +} + +const onOutput = (addon, event, jobId, data, error) => { + if (event === 'Output') console.log('Segment:', data.text) + if (event === 'JobEnded') console.log('Done:', data) + if (event === 'Error') console.error('Error:', error) +} + +const model = new BCIInterface(binding, config, onOutput) +await model.activate() + +// Batch mode โ€” pass entire signal at once +const neuralData = fs.readFileSync('signal.bin') +await model.runJob({ input: new Uint8Array(neuralData) }) + +// Streaming mode โ€” send chunks then signal end +await model.append({ type: 'neural', input: chunk1 }) +await model.append({ type: 'neural', input: chunk2 }) +await model.append({ type: 'end of job' }) + +await model.destroyInstance() +``` + +## Testing + +### Integration Tests + +```bash +WHISPER_MODEL_PATH=./models/ggml-bci-windowed.bin npm run test:integration +``` + +### C++ Unit Tests + +```bash +VCPKG_ROOT=/path/to/vcpkg npm run test:cpp +``` + +## Configuration + +### whisperConfig + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `language` | string | `"en"` | Language code | +| `temperature` | number | `0.0` | Sampling temperature | +| `n_threads` | number | `0` (auto) | Number of threads | + +### bciConfig + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `day_idx` | number | `0` | Session day index for day-specific projection | + +### contextParams + +| Parameter | Type | Description | +|-----------|------|-------------| +| `model` | string | **Required.** Path to BCI GGML model file | +| `use_gpu` | boolean | Enable GPU acceleration | +| `flash_attn` | boolean | Enable flash attention | + +## whisper.cpp Patches + +The package includes a vcpkg overlay with 4 patches applied to whisper.cpp: + +| Patch | Description | +|-------|-------------| +| 0001 | Fix vcpkg build | +| 0002 | Fix Apple Silicon cross-compilation | +| 0003 | Variable conv1 kernel size (read `n_audio_conv1_kernel` from model header) | +| 0004 | Windowed attention mask, window size/layer params in header, BCI-specific SOS tokens | + +## Platform Support + +| Platform | Architecture | Status | +|----------|-------------|--------| +| macOS | arm64 (Apple Silicon) | Tested | +| Linux | x64 | Feasible (same build system as transcription-whispercpp) | +| Windows | x64 | Feasible (whisper.cpp supports MSVC) | +| Android | arm64 | Feasible (NDK toolchain) | +| iOS | arm64 | Feasible (Xcode toolchain) | + +## License + +Apache-2.0 diff --git a/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp b/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp new file mode 100644 index 0000000000..f5d8f7c40d --- /dev/null +++ b/packages/bci-whispercpp/addon/src/addon/AddonJs.hpp @@ -0,0 +1,160 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "model-interface/BCITypes.hpp" +#include "model-interface/bci/BCIModel.hpp" +#include "src/js-interface/JSAdapter.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace js = qvac_lib_inference_addon_cpp::js; +using qvac_lib_inference_addon_cpp::OutputQueue; + +inline void disableWhisperLogs( + enum ggml_log_level, const char*, void*) {} + +inline BCIConfig +createBCIConfig(js_env_t* env, const js::Object& configurationParams) { + JSAdapter adapter; + return adapter.loadFromJSObject(configurationParams, env); +} + +struct JsTranscriptOutputHandler + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler { + JsTranscriptOutputHandler() + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + Transcript>([this](const Transcript& output) -> js_value_t* { + auto jsTranscript = js::Object::create(this->env_); + jsTranscript.setProperty( + this->env_, "text", js::String::create(this->env_, output.text)); + jsTranscript.setProperty( + this->env_, "toAppend", + js::Boolean::create(this->env_, output.toAppend)); + jsTranscript.setProperty( + this->env_, "start", + js::Number::create(this->env_, output.start)); + jsTranscript.setProperty( + this->env_, "end", + js::Number::create(this->env_, output.end)); + jsTranscript.setProperty( + this->env_, "id", + js::Number::create(this->env_, static_cast(output.id))); + return jsTranscript; + }) {} +}; + +struct JsTranscriptArrayOutputHandler + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + std::vector> { + JsTranscriptArrayOutputHandler() + : qvac_lib_inference_addon_cpp::out_handl::JsBaseOutputHandler< + std::vector>( + [this](const std::vector& output) -> js_value_t* { + auto jsOutput = js::Array::create(this->env_); + for (size_t i = 0; i < output.size(); ++i) { + auto jsTranscript = js::Object::create(this->env_); + jsTranscript.setProperty( + this->env_, "text", + js::String::create(this->env_, output[i].text)); + jsTranscript.setProperty( + this->env_, "toAppend", + js::Boolean::create(this->env_, output[i].toAppend)); + jsTranscript.setProperty( + this->env_, "start", + js::Number::create(this->env_, output[i].start)); + jsTranscript.setProperty( + this->env_, "end", + js::Number::create(this->env_, output[i].end)); + jsTranscript.setProperty( + this->env_, "id", + js::Number::create( + this->env_, static_cast(output[i].id))); + jsOutput.set(this->env_, i, jsTranscript); + } + return jsOutput; + }) {} +}; + +inline js_value_t* createInstance(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + whisper_log_set(disableWhisperLogs, nullptr); + JsArgsParser args(env, info); + auto configurationParams = args.getJsObject(1, "configurationParams"); + + unique_ptr model = + make_unique(createBCIConfig(env, configurationParams)); + + out_handl::OutputHandlers outputHandlers; + outputHandlers.add(make_shared()); + outputHandlers.add(make_shared()); + unique_ptr callback = make_unique( + env, + args.get(0, "jsHandle"), + args.getFunction(2, "outputCallback"), + std::move(outputHandlers)); + + auto addon = make_unique(env, std::move(callback), std::move(model)); + return JsInterface::createInstance(env, std::move(addon)); +} +JSCATCH + +inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + JsArgsParser args(env, info); + AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance")); + auto [type, jsInput] = JsInterface::getInput(args); + + if (type != "neural") { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Unknown input type: " + type + " (expected 'neural')"); + } + + vector neuralBytes = + js::TypedArray(env, jsInput).as>(env); + return instance.runJob(std::any(std::move(neuralBytes))); +} +JSCATCH + +inline js_value_t* reload(js_env_t* env, js_callback_info_t* info) try { + using namespace qvac_lib_inference_addon_cpp; + using namespace std; + + JsArgsParser args(env, info); + AddonJs& instance = JsInterface::getInstance(env, args.get(0, "instance")); + auto configurationParams = args.getJsObject(1, "configurationParams"); + BCIConfig config = createBCIConfig(env, configurationParams); + + return js::JsAsyncTask::run( + env, + [addonCpp = instance.addonCpp, config = std::move(config)]() mutable { + auto* bciModel = + dynamic_cast(&addonCpp->model.get()); + if (bciModel == nullptr) { + throw std::runtime_error("Invalid model type for reload"); + } + bciModel->setConfig(config); + }); +} +JSCATCH + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp b/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp new file mode 100644 index 0000000000..5711fb5c53 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/addon/BCIErrors.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +#include "qvac-lib-inference-addon-cpp/Errors.hpp" + +namespace qvac_lib_inference_addon_bci::errors { +constexpr const char* ADDON_ID = "BCI"; +} // namespace qvac_lib_inference_addon_bci::errors + +namespace qvac_errors { +namespace bci_error { +enum class Code : std::uint8_t { + InvalidNeuralSignal, + UnsupportedSignalFormat, + ProcessingFailed, +}; + +inline qvac_errors::StatusError +makeStatus(Code /*code*/, const std::string& message) { + return qvac_errors::StatusError("BCI", "BCIError", message); +} +} // namespace bci_error +} // namespace qvac_errors diff --git a/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp new file mode 100644 index 0000000000..58e60eeb47 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.cpp @@ -0,0 +1,129 @@ +#include "JSAdapter.hpp" + +#include +#include +#include + +#include + +using namespace qvac_lib_inference_addon_cpp::js; + +namespace qvac_lib_inference_addon_bci { + +namespace { + +auto getPropertyNames(js_env_t* env, Object object) -> Array { + js_value_t* propertyNames; + JS(js_get_property_names(env, object, &propertyNames)); + return Array::fromValue(propertyNames); +} + +auto getValueType(js_env_t* env, js_value_t* value) -> js_value_type_t { + js_value_type_t valueType; + JS(js_typeof(env, value, &valueType)); + return valueType; +} + +template +void addConfigParam( + std::map& cfg, std::string&& key, T&& value) { + if (auto e = cfg.try_emplace(std::move(key), std::forward(value)); + !e.second) { + std::ostringstream oss; + oss << "key '" << key << "' already exists"; + throw std::runtime_error{oss.str()}; + } +} + +} // namespace + +void JSAdapter::loadMap( + Object jsObject, js_env_t* env, + std::map& output) { + + auto names = getPropertyNames(env, jsObject); + auto namesSize = names.size(env); + for (auto i = 0; i < namesSize; ++i) { + auto key = names.get(env, i); + auto value = jsObject.getProperty(env, key); + switch (getValueType(env, value)) { + case js_boolean: + addConfigParam( + output, + key.as(env), + Boolean::fromValue(value).as(env)); + break; + case js_number: + addConfigParam( + output, + key.as(env), + Number::fromValue(value).as(env)); + break; + case js_string: + addConfigParam( + output, + key.as(env), + String::fromValue(value).as(env)); + break; + case js_object: + continue; + case js_function: + continue; + default: + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + "Invalid type for key: " + key.as(env) + + " is not supported"); + } + } +} + +BCIConfig JSAdapter::loadFromJSObject(Object jsObject, js_env_t* env) { + BCIConfig config; + + auto whisperConfigObj = + jsObject.getOptionalProperty(env, "whisperConfig"); + if (whisperConfigObj.has_value()) { + loadMap(whisperConfigObj.value(), env, config.whisperMainCfg); + } + + auto contextParamsObj = + jsObject.getOptionalProperty(env, "contextParams"); + if (contextParamsObj.has_value()) { + loadContextParams(contextParamsObj.value(), env, config); + } + + auto miscConfigObj = + jsObject.getOptionalProperty(env, "miscConfig"); + if (miscConfigObj.has_value()) { + loadMiscParams(miscConfigObj.value(), env, config); + } + + auto bciConfigObj = + jsObject.getOptionalProperty(env, "bciConfig"); + if (bciConfigObj.has_value()) { + loadBCIParams(bciConfigObj.value(), env, config); + } + + return config; +} + +BCIConfig JSAdapter::loadContextParams( + Object contextParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(contextParamsObj, env, config.whisperContextCfg); + return config; +} + +BCIConfig JSAdapter::loadMiscParams( + Object miscParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(miscParamsObj, env, config.miscConfig); + return config; +} + +BCIConfig JSAdapter::loadBCIParams( + Object bciParamsObj, js_env_t* env, BCIConfig& config) { + loadMap(bciParamsObj, env, config.bciConfig); + return config; +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp new file mode 100644 index 0000000000..9b5b18b7c8 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/JSAdapter.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include + +#include + +#include "addon/BCIErrors.hpp" +#include "model-interface/bci/BCIConfig.hpp" +#include "qvac-lib-inference-addon-cpp/Errors.hpp" + +namespace qvac_lib_inference_addon_cpp::js { +class Object; +} + +namespace qvac_lib_inference_addon_bci { + +class JSAdapter { +public: + JSAdapter() = default; + + auto loadFromJSObject( + qvac_lib_inference_addon_cpp::js::Object jsObject, js_env_t* env) + -> BCIConfig; + + auto loadContextParams( + qvac_lib_inference_addon_cpp::js::Object contextParamsObj, js_env_t* env, + BCIConfig& config) + -> BCIConfig; + + auto loadMiscParams( + qvac_lib_inference_addon_cpp::js::Object miscParamsObj, js_env_t* env, + BCIConfig& config) + -> BCIConfig; + + auto loadBCIParams( + qvac_lib_inference_addon_cpp::js::Object bciParamsObj, js_env_t* env, + BCIConfig& config) + -> BCIConfig; + +private: + void loadMap( + qvac_lib_inference_addon_cpp::js::Object jsObject, js_env_t* env, + std::map& output); +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/js-interface/binding.cpp b/packages/bci-whispercpp/addon/src/js-interface/binding.cpp new file mode 100644 index 0000000000..3a9a90072c --- /dev/null +++ b/packages/bci-whispercpp/addon/src/js-interface/binding.cpp @@ -0,0 +1,39 @@ +#include + +#include "src/addon/AddonJs.hpp" + +// NOLINTBEGIN(cppcoreguidelines-macro-usage,readability-function-cognitive-complexity,modernize-use-trailing-return-type,readability-identifier-naming) +auto qvac_lib_inference_addon_bci_exports( + js_env_t* env, + js_value_t* exports) + -> js_value_t* { // NOLINT(readability-identifier-naming) + +#define V(name, fn) \ + { \ + js_value_t* val; \ + if (js_create_function(env, name, -1, fn, nullptr, &val) != 0) { \ + return nullptr; \ + } \ + if (js_set_named_property(env, exports, name, val) != 0) { \ + return nullptr; \ + } \ + } + + V("createInstance", qvac_lib_inference_addon_bci::createInstance) + V("runJob", qvac_lib_inference_addon_bci::runJob) + V("reload", qvac_lib_inference_addon_bci::reload) + V("loadWeights", qvac_lib_inference_addon_cpp::JsInterface::loadWeights) + V("activate", qvac_lib_inference_addon_cpp::JsInterface::activate) + V("cancel", qvac_lib_inference_addon_cpp::JsInterface::cancel) + V("destroyInstance", + qvac_lib_inference_addon_cpp::JsInterface::destroyInstance) + V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger) + V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger) +#undef V + + return exports; +} + +BARE_MODULE( + qvac_lib_inference_addon_bci, qvac_lib_inference_addon_bci_exports) +// NOLINTEND(cppcoreguidelines-macro-usage,readability-function-cognitive-complexity,modernize-use-trailing-return-type,readability-identifier-naming) diff --git a/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp b/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp new file mode 100644 index 0000000000..900ee86d97 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/BCITypes.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + +namespace qvac_lib_inference_addon_bci { + +struct Transcript { + std::string text; + bool toAppend; + float start; + float end; + size_t id; + + Transcript() : toAppend{false}, start(-1.0F), end(-1.0F), id{0} {} + + explicit Transcript(std::string_view strView) + : text{strView}, toAppend{false}, start{-1.0F}, end{-1.0F}, id{0} {} +}; + +struct NeuralSignalHeader { + uint32_t numTimesteps; + uint32_t numChannels; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp new file mode 100644 index 0000000000..5a80272db4 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.cpp @@ -0,0 +1,150 @@ +#include "BCIConfig.hpp" + +#include +#include + +namespace qvac_lib_inference_addon_bci { + +std::string convertVariantToString(const JSValueVariant& value) { + return std::visit( + [](const auto& v) -> std::string { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return "null"; + } else if constexpr (std::is_same_v) { + return std::to_string(v); + } else if constexpr (std::is_same_v) { + std::ostringstream oss; + oss << v; + return oss.str(); + } else if constexpr (std::is_same_v) { + return v; + } else if constexpr (std::is_same_v) { + return v ? "true" : "false"; + } + return "unknown"; + }, + value); +} + +const HandlersMap& getWhisperMainHandlers() { + static const HandlersMap handlers = { + {"language", + [](whisper_full_params& /*p*/, const JSValueVariant& /*v*/) { + // Language is handled separately in toWhisperFullParams via + // BCIConfig::lang_ to avoid static-local lifetime issues. + }}, + {"n_threads", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* i = std::get_if(&v)) { + p.n_threads = *i; + } + }}, + {"translate", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.translate = *b; + } + }}, + {"no_timestamps", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.no_timestamps = *b; + } + }}, + {"single_segment", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.single_segment = *b; + } + }}, + {"temperature", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* d = std::get_if(&v)) { + p.temperature = static_cast(*d); + } + }}, + {"suppress_nst", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.suppress_nst = *b; + } + }}, + {"duration_ms", + [](whisper_full_params& p, const JSValueVariant& v) { + if (auto* i = std::get_if(&v)) { + p.duration_ms = *i; + } + }}, + }; + return handlers; +} + +const HandlersMap& getWhisperContextHandlers() { + static const HandlersMap handlers = { + {"use_gpu", + [](whisper_context_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.use_gpu = *b; + } + }}, + {"flash_attn", + [](whisper_context_params& p, const JSValueVariant& v) { + if (auto* b = std::get_if(&v)) { + p.flash_attn = *b; + } + }}, + }; + return handlers; +} + +whisper_full_params toWhisperFullParams(BCIConfig& bciConfig) { + whisper_full_params params = whisper_full_default_params( + WHISPER_SAMPLING_BEAM_SEARCH); + + // BCI defaults matching the Python notebook's decode settings + params.beam_search.beam_size = 4; + params.suppress_nst = false; + params.suppress_blank = false; + params.temperature = 0.0F; + params.no_timestamps = true; + params.single_segment = true; + params.no_context = true; + params.length_penalty = 0.14F; + params.max_initial_ts = 0; + + const auto& handlers = getWhisperMainHandlers(); + for (const auto& [key, value] : bciConfig.whisperMainCfg) { + auto it = handlers.find(key); + if (it != handlers.end()) { + it->second(params, value); + } + } + + // Set language from config-owned storage so the pointer outlives params + auto langIt = bciConfig.whisperMainCfg.find("language"); + if (langIt != bciConfig.whisperMainCfg.end()) { + if (auto* s = std::get_if(&langIt->second)) { + bciConfig.lang_ = *s; + params.language = bciConfig.lang_.c_str(); + } + } + + return params; +} + +whisper_context_params toWhisperContextParams(const BCIConfig& bciConfig) { + whisper_context_params params = whisper_context_default_params(); + + const auto& handlers = getWhisperContextHandlers(); + for (const auto& [key, value] : bciConfig.whisperContextCfg) { + auto it = handlers.find(key); + if (it != handlers.end()) { + it->second(params, value); + } + } + + return params; +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp new file mode 100644 index 0000000000..df1b0ac75c --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIConfig.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace qvac_lib_inference_addon_bci { + +using JSValueVariant = + std::variant; + +template +using HandlerFunction = std::function; + +template +using HandlersMap = std::unordered_map>; + +struct BCIConfig { + std::map miscConfig; + std::map whisperMainCfg; + std::map whisperContextCfg; + std::map bciConfig; + + // Owned storage for string values that whisper_full_params references by + // pointer (e.g. p.language = lang_.c_str()). Must outlive the params struct. + mutable std::string lang_; +}; + +whisper_full_params toWhisperFullParams(BCIConfig& bciConfig); +whisper_context_params toWhisperContextParams(const BCIConfig& bciConfig); + +std::string convertVariantToString(const JSValueVariant& value); + +// Maps of handler functions for setting whisper_full_params fields from JS. +const HandlersMap& getWhisperMainHandlers(); +const HandlersMap& getWhisperContextHandlers(); + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp new file mode 100644 index 0000000000..8d5a3717a0 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.cpp @@ -0,0 +1,347 @@ +#include "BCIModel.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "BCIConfig.hpp" +#include "addon/BCIErrors.hpp" +#include "model-interface/BCITypes.hpp" +#include "qvac-lib-inference-addon-cpp/Errors.hpp" +#include "qvac-lib-inference-addon-cpp/Logger.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace { +constexpr double K_SAMPLES_PER_SECOND = 16000.0; +constexpr float K_SEGMENT_TIMESTAMP_SCALE = 0.01F; +constexpr int K_WARMUP_SAMPLE_COUNT = 8000; +constexpr int K_DUMMY_AUDIO_30S = 16000 * 30; +} // namespace + +static bool shouldAbortWhisper(void* userData) { + const auto* cancelRequested = static_cast(userData); + return cancelRequested != nullptr && + cancelRequested->load(std::memory_order_relaxed); +} + +// Called right before the encoder runs. Replaces the mel spectrogram +// (computed from dummy silence) with our neural-signal-derived features. +static bool onEncoderBegin( + whisper_context* ctx, whisper_state* state, void* userData) { + auto* cbData = static_cast(userData); + if (cbData == nullptr || cbData->melData == nullptr) { + return true; + } + + int result = whisper_set_mel_with_state( + cbData->ctx, state, + cbData->melData, cbData->melFrames, cbData->melBins); + + if (result != 0) { + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::ERROR, + "whisper_set_mel_with_state failed: " + std::to_string(result)); + return false; + } + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG, + "Injected neural mel features: " + + std::to_string(cbData->melFrames) + " frames x " + + std::to_string(cbData->melBins) + " bins"); + return true; +} + +BCIModel::BCIModel(BCIConfig config) + : cfg_(std::move(config)), neuralProcessor_() {} + +BCIModel::~BCIModel() noexcept { + try { + unload(); + } catch (...) { + is_loaded_ = false; + } +} + +void BCIModel::loadEmbedderIfNeeded() { + if (neuralProcessor_.hasWeights()) { + return; + } + + // Look for embedder weights next to the model file + auto modelPathIt = cfg_.whisperContextCfg.find("model"); + if (modelPathIt == cfg_.whisperContextCfg.end()) { + return; + } + const auto modelPath = std::get(modelPathIt->second); + + // Try: same directory, "bci-embedder.bin" + auto dir = modelPath.substr(0, modelPath.find_last_of('/')); + auto embedderPath = dir + "/bci-embedder.bin"; + + if (neuralProcessor_.loadEmbedderWeights(embedderPath)) { + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loaded BCI embedder weights from: " + embedderPath); + } else { + throw std::runtime_error( + "BCI embedder weights not found at: " + embedderPath + + ". This file is required for neural signal preprocessing. " + "Generate it with: python3 scripts/convert-model.py --checkpoint "); + } +} + +void BCIModel::load() { + if (!ctx_) { + whisper_context_params contextParams = toWhisperContextParams(cfg_); + + const auto modelPathIt = cfg_.whisperContextCfg.find("model"); + if (modelPathIt == cfg_.whisperContextCfg.end()) { + throw std::runtime_error("Model path not specified"); + } + const auto modelPath = std::get(modelPathIt->second); + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loading BCI model from: " + modelPath); + ctx_.reset( + whisper_init_from_file_with_params(modelPath.c_str(), contextParams)); + + if (ctx_ == nullptr) { + throw std::runtime_error("Failed to initialize Whisper context for BCI"); + } + + is_loaded_ = true; + + loadEmbedderIfNeeded(); + + if (!is_warmed_up_) { + warmup(); + is_warmed_up_ = true; + } + } +} + +void BCIModel::unload() { + resetContext(); + is_loaded_ = false; +} + +void BCIModel::reload() { + unload(); + load(); +} + +void BCIModel::reset() { + output_.clear(); + totalSamples_ = 0; + totalTokens_ = 0; + totalSegments_ = 0; + processCalls_ = 0; + totalWallMs_ = 0.0; +} + +qvac_lib_inference_addon_cpp::RuntimeStats BCIModel::runtimeStats() const { + qvac_lib_inference_addon_cpp::RuntimeStats stats; + + const double totalTimeSec = totalWallMs_ / 1000.0; + const double tps = totalTimeSec > 0.0 + ? (static_cast(totalTokens_) / totalTimeSec) + : 0.0; + + stats.emplace_back("totalTime", totalTimeSec); + stats.emplace_back("tokensPerSecond", tps); + stats.emplace_back("totalTokens", totalTokens_); + stats.emplace_back("totalSegments", totalSegments_); + stats.emplace_back("processCalls", processCalls_); + stats.emplace_back("totalWallMs", totalWallMs_); + return stats; +} + +static void onNewSegment( + [[maybe_unused]] whisper_context* ctx, whisper_state* state, int nNew, + void* userData) { + auto* bci = static_cast(userData); + if (bci == nullptr || state == nullptr) return; + + const int nSegments = whisper_full_n_segments_from_state(state); + if (nNew <= 0 || nSegments <= 0) return; + const int startIndex = std::max(0, nSegments - nNew); + + for (int i = startIndex; i < nSegments; i++) { + Transcript transcript; + const char* text = whisper_full_get_segment_text_from_state(state, i); + transcript.text = text != nullptr ? text : ""; + transcript.start = + static_cast(whisper_full_get_segment_t0_from_state(state, i)) * + K_SEGMENT_TIMESTAMP_SCALE; + transcript.end = + static_cast(whisper_full_get_segment_t1_from_state(state, i)) * + K_SEGMENT_TIMESTAMP_SCALE; + transcript.id = i; + + bci->emitSegment(transcript); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + bci->addTranscription(transcript); + + const int nTokens = whisper_full_n_tokens_from_state(state, i); + bci->recordSegmentStats(nTokens); + } +} + +void BCIModel::warmup() { + if (!ctx_) return; + + std::vector silentAudio(K_WARMUP_SAMPLE_COUNT, 0.0F); + whisper_full_params params = toWhisperFullParams(cfg_); + params.new_segment_callback = nullptr; + params.new_segment_callback_user_data = nullptr; + + whisper_full(ctx_.get(), params, + silentAudio.data(), + static_cast(silentAudio.size())); +} + +void BCIModel::process(const Input& rawNeuralData) { + if (ctx_ == nullptr) load(); + if (ctx_ == nullptr) { + throw std::runtime_error("BCI Whisper context is not initialized"); + } + + if (cancelRequested_.load(std::memory_order_relaxed)) { + throw std::runtime_error("Job cancelled"); + } + + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG, + "Processing neural signal (" + + std::to_string(rawNeuralData.size()) + " bytes)"); + + int dayIdx = 0; + auto it = cfg_.bciConfig.find("day_idx"); + if (it != cfg_.bciConfig.end()) { + if (auto* d = std::get_if(&it->second)) { + dayIdx = static_cast(*d); + } else if (auto* i = std::get_if(&it->second)) { + dayIdx = *i; + } + } + + auto melFeatures = neuralProcessor_.processToMel(rawNeuralData, dayIdx); + const int melBins = neuralProcessor_.getMelBins(); + const int melFrames = neuralProcessor_.getMelFrames(); + + processCalls_ += 1; + + if (ctx_ != nullptr) { + whisper_reset_timings(ctx_.get()); + } + + const auto startTime = std::chrono::steady_clock::now(); + + EncoderCallbackData cbData; + cbData.ctx = ctx_.get(); + cbData.melData = melFeatures.data(); + cbData.melFrames = melFrames; + cbData.melBins = melBins; + + whisper_full_params params = toWhisperFullParams(cfg_); + params.new_segment_callback = onNewSegment; + params.new_segment_callback_user_data = this; + params.abort_callback = shouldAbortWhisper; + params.abort_callback_user_data = &cancelRequested_; + params.encoder_begin_callback = onEncoderBegin; + params.encoder_begin_callback_user_data = &cbData; + + std::vector dummyAudio(K_DUMMY_AUDIO_30S, 0.0F); + + int result = whisper_full( + ctx_.get(), params, + dummyAudio.data(), static_cast(dummyAudio.size())); + + const auto endTime = std::chrono::steady_clock::now(); + totalWallMs_ += + std::chrono::duration(endTime - startTime).count(); + + if (result != 0) { + if (cancelRequested_.load(std::memory_order_relaxed)) { + throw std::runtime_error("Job cancelled"); + } + throw std::runtime_error( + "Failed to process neural signal (whisper_full returned " + + std::to_string(result) + ")"); + } +} + +std::any BCIModel::process(const std::any& input) { + AnyInput modelInput; + if (const auto* anyInput = std::any_cast(&input)) { + modelInput = *anyInput; + } else if (const auto* inputVector = std::any_cast(&input)) { + modelInput.input = *inputVector; + } else { + throw qvac_errors::StatusError( + qvac_errors::general_error::InvalidArgument, + std::string("Invalid input type for BCIModel::process: ") + + input.type().name()); + } + + const auto previousOutputCallback = on_segment_; + const bool shouldOverrideCallback = + static_cast(modelInput.outputCallback); + if (shouldOverrideCallback) { + on_segment_ = modelInput.outputCallback; + } + + reset(); + cancelRequested_.store(false, std::memory_order_relaxed); + try { + process(modelInput.input); + } catch (...) { + if (shouldOverrideCallback) { + on_segment_ = previousOutputCallback; + } + throw; + } + + if (shouldOverrideCallback) { + on_segment_ = previousOutputCallback; + } + + return output_; +} + +void BCIModel::saveLoadParams(const BCIConfig& config) { + setConfig(config); +} + +void BCIModel::cancel() const { + cancelRequested_.store(true, std::memory_order_relaxed); +} + +bool BCIModel::configContextIsChanged( + const BCIConfig& oldCfg, const BCIConfig& newCfg) { + const std::vector contextKeys = { + "model", "use_gpu", "flash_attn", "gpu_device"}; + return std::ranges::any_of(contextKeys, [&](const std::string& key) { + const auto oldIt = oldCfg.whisperContextCfg.find(key); + const auto newIt = newCfg.whisperContextCfg.find(key); + if (oldIt != oldCfg.whisperContextCfg.end() && + newIt != newCfg.whisperContextCfg.end()) { + return oldIt->second != newIt->second; + } + return (oldIt != oldCfg.whisperContextCfg.end()) != + (newIt != newCfg.whisperContextCfg.end()); + }); +} + +void BCIModel::resetContext() { ctx_.reset(); } + +void BCIModel::setConfig(const BCIConfig& config) { + bool contextChanged = configContextIsChanged(cfg_, config); + cfg_ = config; + if (contextChanged) reload(); +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp new file mode 100644 index 0000000000..29493e6bb0 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/BCIModel.hpp @@ -0,0 +1,130 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "BCIConfig.hpp" +#include "NeuralProcessor.hpp" +#include "model-interface/BCITypes.hpp" +#include "qvac-lib-inference-addon-cpp/ModelInterfaces.hpp" +#include "qvac-lib-inference-addon-cpp/RuntimeStats.hpp" + +namespace qvac_lib_inference_addon_bci { + +class BCIModel + : public qvac_lib_inference_addon_cpp::model::IModel, + public qvac_lib_inference_addon_cpp::model::IModelCancel, + public qvac_lib_inference_addon_cpp::model::IModelAsyncLoad { +public: + using OutputCallback = std::function; + using ValueType = float; + using Input = std::vector; + using Output = std::vector; + + struct AnyInput { + Input input; + OutputCallback outputCallback = nullptr; + }; + + // Data passed to encoder_begin_callback so it can inject mel features. + struct EncoderCallbackData { + whisper_context* ctx = nullptr; + const float* melData = nullptr; + int melFrames = 0; + int melBins = 0; + }; + + explicit BCIModel(BCIConfig config); + ~BCIModel() noexcept; + + void initializeBackend() {} + void setConfig(const BCIConfig& config); + + auto setOnSegmentCallback(const OutputCallback& callback) -> void { + on_segment_ = callback; + } + auto addTranscription(const Transcript& transcript) -> void { + output_.push_back(transcript); + } + auto hasSegmentCallback() const -> bool { + return static_cast(on_segment_); + } + auto emitSegment(const Transcript& transcript) -> void { + if (on_segment_) { + on_segment_(transcript); + } + } + + std::string getName() const override { return "BCIModel"; } + std::any process(const std::any& input) override; + void cancel() const override; + + void process(const Input& input); + + void load(); + void unload(); + void unloadWeights() { unload(); } + void reload(); + void reset(); + void waitForLoadInitialization() override { load(); } + void setWeightsForFile( + const std::string&, + std::unique_ptr>&&) override {} + void set_weights_for_file( + const std::string&, + const std::span&, bool) {} + bool isLoaded() const { return is_loaded_; } + qvac_lib_inference_addon_cpp::RuntimeStats runtimeStats() const override; + void warmup(); + + void saveLoadParams(const BCIConfig& config); + template + std::enable_if_t, BCIConfig>, void> + saveLoadParams(T&&, Args&&...) {} + + void recordSegmentStats(int nTokens) { + totalSegments_ += 1; + if (nTokens > 0) { + totalTokens_ += static_cast(nTokens); + } + } + +private: + static bool configContextIsChanged( + const BCIConfig& oldCfg, const BCIConfig& newCfg); + void resetContext(); + void loadEmbedderIfNeeded(); + + BCIConfig cfg_; + NeuralProcessor neuralProcessor_; + OutputCallback on_segment_; + Output output_; + + struct WhisperContextDeleter { + void operator()(whisper_context* ctx) const noexcept { + if (ctx != nullptr) { + whisper_free(ctx); + } + } + }; + + std::unique_ptr ctx_{nullptr}; + bool is_loaded_ = false; + bool is_warmed_up_ = false; + + int64_t totalSamples_ = 0; + int64_t totalTokens_ = 0; + int64_t totalSegments_ = 0; + int64_t processCalls_ = 0; + double totalWallMs_ = 0.0; + mutable std::atomic_bool cancelRequested_{false}; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp new file mode 100644 index 0000000000..b7e4ee5be8 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.cpp @@ -0,0 +1,241 @@ +#include "NeuralProcessor.hpp" + +#include +#include +#include +#include +#include + +#include "addon/BCIErrors.hpp" +#include "qvac-lib-inference-addon-cpp/Logger.hpp" + +namespace qvac_lib_inference_addon_bci { + +namespace { +constexpr size_t K_HEADER_BYTES = 8; +constexpr uint32_t K_EMBEDDER_MAGIC = 0x42434945; +} // namespace + +NeuralProcessor::NeuralProcessor() = default; + +bool NeuralProcessor::loadEmbedderWeights(const std::string& path) { + std::ifstream f(path, std::ios::binary); + if (!f.is_open()) return false; + + auto readU32 = [&]() -> uint32_t { + uint32_t v = 0; + f.read(reinterpret_cast(&v), sizeof(v)); + return v; + }; + auto readFloats = [&](size_t count) -> std::vector { + std::vector data(count); + f.read(reinterpret_cast(data.data()), + static_cast(count * sizeof(float))); + return data; + }; + auto readInts = [&](size_t count) -> std::vector { + std::vector data(count); + f.read(reinterpret_cast(data.data()), + static_cast(count * sizeof(int32_t))); + return data; + }; + + if (readU32() != K_EMBEDDER_MAGIC || readU32() != 1) return false; + + weights_.numFeatures = readU32(); + /*embedDim=*/ readU32(); + /*kernelSize1=*/ readU32(); + /*kernelSize2=*/ readU32(); + /*stride2=*/ readU32(); + weights_.numDays = readU32(); + weights_.numMonths = readU32(); + weights_.r = readU32(); + + // Skip conv1/conv2 weights (handled by GGML model) + uint32_t n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + n = readU32(); readFloats(n); + + n = readU32(); + weights_.sessionToDayMap = readInts(n); + + weights_.dayAs.resize(weights_.numDays); + weights_.dayBs.resize(weights_.numDays); + weights_.dayBiases.resize(weights_.numDays); + for (uint32_t i = 0; i < weights_.numDays; ++i) { + n = readU32(); weights_.dayAs[i] = readFloats(n); + n = readU32(); weights_.dayBs[i] = readFloats(n); + n = readU32(); weights_.dayBiases[i] = readFloats(n); + } + + weights_.monthWeights.resize(weights_.numMonths); + weights_.monthBiases.resize(weights_.numMonths); + for (uint32_t i = 0; i < weights_.numMonths; ++i) { + n = readU32(); weights_.monthWeights[i] = readFloats(n); + n = readU32(); weights_.monthBiases[i] = readFloats(n); + } + + weights_.loaded = true; + QLOG(qvac_lib_inference_addon_cpp::logger::Priority::INFO, + "Loaded day projection weights: " + + std::to_string(weights_.numDays) + " days, r=" + + std::to_string(weights_.r)); + return true; +} + +std::vector NeuralProcessor::gaussianSmooth( + const std::vector& data, + uint32_t numTimesteps, uint32_t numChannels, + float kernelStd, int kernelSize) { + + std::vector kernel(kernelSize); + const int center = kernelSize / 2; + float sum = 0.0F; + for (int i = 0; i < kernelSize; ++i) { + float x = static_cast(i - center); + kernel[i] = std::exp(-0.5F * (x * x) / (kernelStd * kernelStd)); + sum += kernel[i]; + } + for (auto& k : kernel) k /= sum; + + int start = 0, end = kernelSize - 1; + while (start < end && kernel[start] < 0.01F) ++start; + while (end > start && kernel[end] < 0.01F) --end; + std::vector trimK(kernel.begin() + start, kernel.begin() + end + 1); + const int halfK = static_cast(trimK.size()) / 2; + + std::vector result(data.size()); + for (uint32_t c = 0; c < numChannels; ++c) { + for (uint32_t t = 0; t < numTimesteps; ++t) { + float val = 0.0F; + for (int k = 0; k < static_cast(trimK.size()); ++k) { + int srcT = static_cast(t) + k - halfK; + if (srcT >= 0 && srcT < static_cast(numTimesteps)) + val += data[srcT * numChannels + c] * trimK[k]; + } + result[t * numChannels + c] = val; + } + } + return result; +} + +std::vector NeuralProcessor::applyDayProjection( + const std::vector& features, + uint32_t numTimesteps, uint32_t numChannels, int dayIdx) const { + + if (!weights_.loaded || weights_.r == 0) return features; + + const uint32_t nf = weights_.numFeatures; + const uint32_t r = weights_.r; + int di = std::clamp(dayIdx, 0, static_cast(weights_.numDays) - 1); + + const auto& dayA = weights_.dayAs[di]; + const auto& dayB = weights_.dayBs[di]; + const auto& dayBias = weights_.dayBiases[di]; + + std::vector dayDelta(nf * nf, 0.0F); + for (uint32_t i = 0; i < nf; ++i) + for (uint32_t j = 0; j < nf; ++j) { + float s = 0.0F; + for (uint32_t k = 0; k < r; ++k) + s += dayA[i * r + k] * dayB[k * nf + j]; + dayDelta[i * nf + j] = s; + } + + int monthIdx = di / 30; + bool hasMonth = (monthIdx < static_cast(weights_.monthWeights.size()) && + !weights_.monthWeights[monthIdx].empty()); + + std::vector W(nf * nf), bias(nf, 0.0F); + for (uint32_t i = 0; i < nf * nf; ++i) { + W[i] = dayDelta[i]; + if (hasMonth) W[i] += weights_.monthWeights[monthIdx][i]; + } + for (uint32_t i = 0; i < nf; ++i) { + bias[i] = dayBias[i]; + if (hasMonth && i < weights_.monthBiases[monthIdx].size()) + bias[i] += weights_.monthBiases[monthIdx][i]; + } + + // Python: output[t,k] = softsign(sum_d(features[t,d] * W[d,k]) + bias[k]) + // i.e. output = features @ W + bias (right-multiply by W) + std::vector output(numTimesteps * nf); + for (uint32_t t = 0; t < numTimesteps; ++t) + for (uint32_t k = 0; k < nf; ++k) { + float s = bias[k]; + for (uint32_t d = 0; d < nf; ++d) + s += features[t * numChannels + d] * W[d * nf + k]; + output[t * nf + k] = s / (1.0F + std::abs(s)); + } + + return output; +} + +std::vector NeuralProcessor::processToMel( + const std::vector& rawData, int dayIdx) const { + + if (rawData.size() < K_HEADER_BYTES) { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::InvalidNeuralSignal, + "Neural signal buffer too small"); + } + + uint32_t numTimesteps = 0, numChannels = 0; + std::memcpy(&numTimesteps, rawData.data(), sizeof(uint32_t)); + std::memcpy(&numChannels, rawData.data() + sizeof(uint32_t), sizeof(uint32_t)); + + size_t expectedBytes = static_cast(numTimesteps) * numChannels * sizeof(float); + if (rawData.size() < K_HEADER_BYTES + expectedBytes) { + throw qvac_errors::bci_error::makeStatus( + qvac_errors::bci_error::Code::InvalidNeuralSignal, + "Neural signal buffer truncated"); + } + + std::vector features(numTimesteps * numChannels); + std::memcpy(features.data(), rawData.data() + K_HEADER_BYTES, expectedBytes); + + // Passthrough mode: if dayIdx == -1, skip preprocessing and treat + // the input as pre-computed mel features in frame-major layout. + if (dayIdx == -1) { + const int melBins = K_WHISPER_N_MEL; + const int melFrames = K_WHISPER_MEL_FRAMES; + std::vector melOutput(melFrames * melBins, 0.0F); + uint32_t framesToCopy = std::min(numTimesteps, static_cast(melFrames)); + uint32_t chToCopy = std::min(numChannels, static_cast(melBins)); + for (uint32_t t = 0; t < framesToCopy; ++t) + for (uint32_t c = 0; c < chToCopy; ++c) + melOutput[c * melFrames + t] = features[t * numChannels + c]; + return melOutput; + } + + // Step 1: Gaussian smoothing (std=2.0, kernel_size=100, matching BrainWhisperer) + auto smoothed = gaussianSmooth(features, numTimesteps, numChannels, 2.0F, 100); + + // Step 2: Day projection (if available) + std::vector projected; + uint32_t projChannels = numChannels; + if (weights_.loaded && weights_.r > 0) { + projected = applyDayProjection(smoothed, numTimesteps, numChannels, dayIdx); + projChannels = weights_.numFeatures; + } else { + projected = smoothed; + } + + // Step 3: Pad to 3000 frames at 512 channels for whisper_set_mel() + // whisper.cpp stores mel as mel.data[mel_bin * n_len + frame] (mel-major), + // so we must write in that layout for whisper_set_mel_with_state. + const int melBins = K_WHISPER_N_MEL; + const int melFrames = K_WHISPER_MEL_FRAMES; + std::vector melOutput(melFrames * melBins, 0.0F); + + uint32_t framesToCopy = std::min(numTimesteps, static_cast(melFrames)); + uint32_t chToCopy = std::min(projChannels, static_cast(melBins)); + for (uint32_t t = 0; t < framesToCopy; ++t) + for (uint32_t c = 0; c < chToCopy; ++c) + melOutput[c * melFrames + t] = projected[t * projChannels + c]; + + return melOutput; +} + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp new file mode 100644 index 0000000000..6909248ca4 --- /dev/null +++ b/packages/bci-whispercpp/addon/src/model-interface/bci/NeuralProcessor.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include +#include + +namespace qvac_lib_inference_addon_bci { + +// Preprocesses raw multi-channel neural signals for whisper.cpp. +// +// Pipeline: neural(512ch) โ†’ smooth โ†’ day_proj โ†’ pad to 3000 frames +// Output is 512-dim x 3000 frames, fed to whisper_set_mel(). +// whisper.cpp (patched) handles: conv1(512โ†’384,k=7) โ†’ GELU โ†’ conv2 โ†’ GELU +// โ†’ positional_embedding โ†’ 6-layer transformer โ†’ LoRA-merged decoder โ†’ text +class NeuralProcessor { +public: + static constexpr int K_WHISPER_N_MEL = 512; // n_mels in GGML model + static constexpr int K_WHISPER_MEL_FRAMES = 3000; + + struct EmbedderWeights { + bool loaded = false; + uint32_t numFeatures = 512; + uint32_t numDays = 0; + uint32_t numMonths = 0; + uint32_t r = 0; + + std::vector sessionToDayMap; + std::vector> dayAs; + std::vector> dayBs; + std::vector> dayBiases; + std::vector> monthWeights; + std::vector> monthBiases; + }; + + NeuralProcessor(); + + bool loadEmbedderWeights(const std::string& path); + + std::vector processToMel( + const std::vector& rawData, + int dayIdx = 0) const; + + static std::vector gaussianSmooth( + const std::vector& data, + uint32_t numTimesteps, uint32_t numChannels, + float kernelStd = 2.0F, int kernelSize = 100); + + std::vector applyDayProjection( + const std::vector& features, + uint32_t numTimesteps, uint32_t numChannels, + int dayIdx) const; + + bool hasWeights() const { return weights_.loaded; } + int getMelBins() const { return K_WHISPER_N_MEL; } + int getMelFrames() const { return K_WHISPER_MEL_FRAMES; } + +private: + EmbedderWeights weights_; +}; + +} // namespace qvac_lib_inference_addon_bci diff --git a/packages/bci-whispercpp/addon/tests/test_core.cpp b/packages/bci-whispercpp/addon/tests/test_core.cpp new file mode 100644 index 0000000000..1dcf0daf8f --- /dev/null +++ b/packages/bci-whispercpp/addon/tests/test_core.cpp @@ -0,0 +1,102 @@ +#include +#include +#include + +#include + +#include "model-interface/bci/NeuralProcessor.hpp" +#include "model-interface/bci/BCIConfig.hpp" + +using namespace qvac_lib_inference_addon_bci; + +namespace { + +std::vector createTestSignal(uint32_t numTimesteps, uint32_t numChannels) { + const size_t headerSize = 2 * sizeof(uint32_t); + const size_t dataSize = numTimesteps * numChannels * sizeof(float); + std::vector buffer(headerSize + dataSize); + + std::memcpy(buffer.data(), &numTimesteps, sizeof(uint32_t)); + std::memcpy(buffer.data() + sizeof(uint32_t), &numChannels, sizeof(uint32_t)); + + auto* data = reinterpret_cast(buffer.data() + headerSize); + for (uint32_t t = 0; t < numTimesteps; ++t) { + for (uint32_t c = 0; c < numChannels; ++c) { + data[t * numChannels + c] = + static_cast(t) / static_cast(numTimesteps) * + std::sin(static_cast(c) * 0.1F); + } + } + return buffer; +} + +} // namespace + +TEST(NeuralProcessor, ProcessToMelProducesCorrectShape) { + NeuralProcessor processor; + auto signal = createTestSignal(100, 512); + auto result = processor.processToMel(signal); + + EXPECT_EQ(result.size(), + static_cast(NeuralProcessor::K_WHISPER_MEL_FRAMES) * + NeuralProcessor::K_WHISPER_N_MEL); +} + +TEST(NeuralProcessor, ProcessToMelRejectsSmallBuffer) { + NeuralProcessor processor; + std::vector tooSmall = {1, 2, 3}; + EXPECT_THROW(processor.processToMel(tooSmall), std::exception); +} + +TEST(NeuralProcessor, GaussianSmoothPreservesSize) { + uint32_t T = 50, C = 8; + std::vector data(T * C, 1.0F); + auto smoothed = NeuralProcessor::gaussianSmooth(data, T, C, 2.0F, 20); + EXPECT_EQ(smoothed.size(), data.size()); +} + +TEST(NeuralProcessor, GaussianSmoothReducesNoise) { + uint32_t T = 100, C = 4; + std::vector data(T * C); + for (uint32_t t = 0; t < T; ++t) + for (uint32_t c = 0; c < C; ++c) + data[t * C + c] = (t % 2 == 0) ? 1.0F : -1.0F; + + auto smoothed = NeuralProcessor::gaussianSmooth(data, T, C, 2.0F, 20); + + float origVar = 0, smoothVar = 0; + for (size_t i = 0; i < data.size(); ++i) { + origVar += data[i] * data[i]; + smoothVar += smoothed[i] * smoothed[i]; + } + EXPECT_LT(smoothVar, origVar); +} + +TEST(NeuralProcessor, OutputValuesAreFinite) { + NeuralProcessor processor; + auto signal = createTestSignal(50, 512); + auto result = processor.processToMel(signal); + for (const auto& sample : result) { + EXPECT_TRUE(std::isfinite(sample)); + } +} + +TEST(NeuralProcessor, PaddedFramesAreZero) { + NeuralProcessor processor; + auto signal = createTestSignal(50, 512); + auto result = processor.processToMel(signal); + + float lastFrameSum = 0; + int lastFrame = NeuralProcessor::K_WHISPER_MEL_FRAMES - 1; + for (int m = 0; m < NeuralProcessor::K_WHISPER_N_MEL; ++m) { + lastFrameSum += std::abs(result[lastFrame * NeuralProcessor::K_WHISPER_N_MEL + m]); + } + EXPECT_FLOAT_EQ(lastFrameSum, 0.0F); +} + +TEST(BCIConfig, DefaultWhisperFullParamsAreValid) { + BCIConfig config; + config.whisperMainCfg["language"] = std::string("en"); + auto params = toWhisperFullParams(config); + EXPECT_STREQ(params.language, "en"); +} diff --git a/packages/bci-whispercpp/bci.js b/packages/bci-whispercpp/bci.js new file mode 100644 index 0000000000..aecf03e235 --- /dev/null +++ b/packages/bci-whispercpp/bci.js @@ -0,0 +1,300 @@ +'use strict' + +const { QvacErrorAddonBCI, ERR_CODES } = require('./lib/error') +const { checkConfig } = require('./configChecker') + +const state = Object.freeze({ + LOADING: 'loading', + LISTENING: 'listening', + PROCESSING: 'processing', + IDLE: 'idle', + PAUSED: 'paused', + STOPPED: 'stopped' +}) + +const END_OF_INPUT = 'end of job' + +/** + * Low-level interface between the Bare C++ BCI addon and the JS runtime. + * Accepts neural signal data (Uint8Array) instead of audio. + */ +class BCIInterface { + /** + * @param {Object} binding - the native binding object + * @param {Object} configurationParams - configuration for the BCI model + * @param {Function} outputCb - callback for inference events (Output, JobEnded, Error) + * @param {Function} [transitionCb] - callback for state changes + */ + constructor (binding, configurationParams, outputCb, transitionCb = null) { + this._binding = binding + this._outputCb = outputCb + this._transitionCb = transitionCb + this._nextJobId = 1 + this._activeJobId = null + this._bufferedSignal = [] + this._state = state.LOADING + + checkConfig(configurationParams) + this._handle = this._binding.createInstance( + this, + configurationParams, + this._addonOutputCallback.bind(this), + transitionCb + ) + } + + _setState (newState) { + this._state = newState + if (this._transitionCb) { + this._transitionCb(this, newState) + } + } + + _addonOutputCallback (addon, event, data, error) { + const isError = typeof error === 'string' && error.length > 0 + const isStats = data && typeof data === 'object' && ( + 'totalTime' in data || + 'audioDurationMs' in data || + 'totalSamples' in data + ) + const isTranscriptOutput = ( + (Array.isArray(data) && data.length > 0) || + (data && typeof data === 'object' && typeof data.text === 'string') + ) + + let mappedEvent = event + if (isError || String(event).includes('Error')) { + mappedEvent = 'Error' + } else if (isStats || String(event).includes('RuntimeStats')) { + mappedEvent = 'JobEnded' + } else if (isTranscriptOutput) { + mappedEvent = 'Output' + } else if (Array.isArray(data) && data.length === 0) { + // BCIModel::process returns an empty vector to avoid duplicate + // segment emissions; skip forwarding this noop event. + return + } + + const jobId = this._activeJobId + if (jobId === null || jobId === undefined) { + return + } + + if (mappedEvent === 'Output') { + this._setState(state.PROCESSING) + } + + if (this._outputCb != null) { + this._outputCb(addon, mappedEvent, jobId, data, isError ? error : null) + } + + if (mappedEvent === 'Error' || mappedEvent === 'JobEnded') { + this._activeJobId = null + this._setState(state.LISTENING) + } + } + + async unload () { + await this.destroyInstance() + } + + async load (configurationParams) { + checkConfig(configurationParams) + await this.destroyInstance() + this._handle = this._binding.createInstance( + this, + configurationParams, + this._addonOutputCallback.bind(this), + this._transitionCb + ) + this._setState(state.LOADING) + } + + async reload (configurationParams) { + checkConfig(configurationParams) + await this.cancel() + + if (typeof this._binding.reload === 'function') { + await this._binding.reload(this._handle, configurationParams) + this._setState(state.LOADING) + return + } + + await this.load(configurationParams) + } + + async loadWeights (weightsData) { + try { + this._binding.loadWeights(this._handle, weightsData) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_LOAD_WEIGHTS, + adds: err.message, + cause: err + }) + } + } + + async unloadWeights () { + return true + } + + async activate () { + try { + this._binding.activate(this._handle) + this._setState(state.LISTENING) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_ACTIVATE, + adds: err.message, + cause: err + }) + } + } + + async cancel (jobId) { + try { + await this._binding.cancel(this._handle, jobId) + this._bufferedSignal = [] + this._activeJobId = null + this._setState(state.LISTENING) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_CANCEL, + adds: err.message, + cause: err + }) + } + } + + /** + * Appends neural signal data to the processing buffer. + * Send { type: 'end of job' } to trigger processing. + * @param {Object} data + * @param {string} data.type - 'neural' or 'end of job' + * @param {Uint8Array} [data.input] - binary neural signal data + * @returns {number} job ID + */ + async append (data) { + try { + if (data?.type === END_OF_INPUT) { + const currentJobId = this._nextJobId + const input = this._concatBufferedSignal() + + let accepted = false + try { + accepted = this._binding.runJob(this._handle, { + type: 'neural', + input + }) + } catch (err) { + this._setState(state.LISTENING) + throw err + } + if (!accepted) { + this._setState(state.LISTENING) + throw new Error('Cannot set new job: a job is already set or being processed') + } + + this._activeJobId = currentJobId + this._nextJobId += 1 + this._bufferedSignal = [] + this._setState(state.PROCESSING) + return currentJobId + } + + if (data?.type === 'neural') { + if (!(data.input instanceof Uint8Array)) { + throw new Error('Neural signal input must be Uint8Array') + } + this._bufferedSignal.push(data.input) + return this._nextJobId + } + + throw new Error(`Unknown append input type: ${data?.type}`) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_APPEND, + adds: err.message, + cause: err + }) + } + } + + /** + * Run a single batch job directly with neural signal data. + * @param {Object} data + * @param {Uint8Array} data.input - binary neural signal data + */ + async runJob (data) { + try { + this._activeJobId = this._nextJobId + this._nextJobId += 1 + this._setState(state.PROCESSING) + const accepted = this._binding.runJob(this._handle, { + type: 'neural', + input: data.input + }) + if (!accepted) { + this._activeJobId = null + this._setState(state.LISTENING) + } + return accepted + } catch (err) { + this._activeJobId = null + this._setState(state.LISTENING) + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_APPEND, + adds: err.message, + cause: err + }) + } + } + + async status () { + return this._state + } + + async destroyInstance () { + if (this._handle === null) { + return + } + try { + try { + await this._binding.cancel(this._handle) + } catch {} + this._binding.destroyInstance(this._handle) + this._handle = null + this._bufferedSignal = [] + this._activeJobId = null + this._setState(state.IDLE) + } catch (err) { + throw new QvacErrorAddonBCI({ + code: ERR_CODES.FAILED_TO_DESTROY, + adds: err.message, + cause: err + }) + } + } + + _concatBufferedSignal () { + if (this._bufferedSignal.length === 0) { + return new Uint8Array() + } + if (this._bufferedSignal.length === 1) { + return this._bufferedSignal[0] + } + const totalLength = this._bufferedSignal.reduce( + (sum, chunk) => sum + chunk.byteLength, 0 + ) + const merged = new Uint8Array(totalLength) + let offset = 0 + for (const chunk of this._bufferedSignal) { + merged.set(chunk, offset) + offset += chunk.byteLength + } + return merged + } +} + +module.exports = { BCIInterface } diff --git a/packages/bci-whispercpp/binding.js b/packages/bci-whispercpp/binding.js new file mode 100644 index 0000000000..cea46308c0 --- /dev/null +++ b/packages/bci-whispercpp/binding.js @@ -0,0 +1 @@ +module.exports = require.addon() diff --git a/packages/bci-whispercpp/configChecker.js b/packages/bci-whispercpp/configChecker.js new file mode 100644 index 0000000000..9dd797275c --- /dev/null +++ b/packages/bci-whispercpp/configChecker.js @@ -0,0 +1,82 @@ +'use strict' + +/** + * Validates BCI addon configuration. + * @param {Object} configObject + * @returns {void} or throws if invalid + */ +function checkConfig (configObject) { + const requiredSections = ['whisperConfig', 'contextParams', 'miscConfig'] + + for (const section of requiredSections) { + if (!configObject[section]) { + throw new Error(`${section} object is required`) + } + } + + const validWhisperParams = [ + 'n_threads', + 'duration_ms', + 'translate', + 'no_timestamps', + 'single_segment', + 'print_special', + 'print_progress', + 'print_realtime', + 'print_timestamps', + 'language', + 'detect_language', + 'suppress_blank', + 'suppress_nst', + 'temperature', + 'greedy_best_of', + 'beam_search_beam_size', + 'seed' + ] + + const validContextParams = [ + 'model', + 'use_gpu', + 'flash_attn', + 'gpu_device' + ] + + const validMiscParams = [ + 'caption_enabled' + ] + + const validBCIParams = [ + 'smooth_kernel_std', + 'smooth_kernel_size', + 'sample_rate', + 'day_idx' + ] + + for (const userParam of Object.keys(configObject.whisperConfig)) { + if (!validWhisperParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for whisperConfig`) + } + } + + for (const userParam of Object.keys(configObject.contextParams)) { + if (!validContextParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for contextParams`) + } + } + + for (const userParam of Object.keys(configObject.miscConfig)) { + if (!validMiscParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for miscConfig`) + } + } + + if (configObject.bciConfig) { + for (const userParam of Object.keys(configObject.bciConfig)) { + if (!validBCIParams.includes(userParam)) { + throw new Error(`${userParam} is not a valid parameter for bciConfig`) + } + } + } +} + +module.exports = { checkConfig } diff --git a/packages/bci-whispercpp/examples/transcribe-neural.js b/packages/bci-whispercpp/examples/transcribe-neural.js new file mode 100644 index 0000000000..7921e6c6a0 --- /dev/null +++ b/packages/bci-whispercpp/examples/transcribe-neural.js @@ -0,0 +1,105 @@ +'use strict' + +/** + * Transcribe neural signal files using the BCI BrainWhisperer model. + * Uses the native whisper.cpp GGML backend. + * + * Usage: + * node examples/transcribe-neural.js [model_path] + * + * Or batch mode (all test fixtures): + * node examples/transcribe-neural.js --batch [model_path] + */ + +const fs = require('bare-fs') +const path = require('bare-path') +const os = require('bare-os') +const BCIWhispercpp = require('../index') + +const DEFAULT_MODEL = (os.hasEnv('WHISPER_MODEL_PATH') ? os.getEnv('WHISPER_MODEL_PATH') : null) || + path.join(__dirname, '..', 'models', 'ggml-bci-windowed.bin') + +async function main () { + const args = global.Bare ? global.Bare.argv.slice(2) : process.argv.slice(2) + const isBatch = args[0] === '--batch' + + if (args.length < 1) { + console.log('Usage:') + console.log(' Single: bare examples/transcribe-neural.js [model_path]') + console.log(' Batch: bare examples/transcribe-neural.js --batch [model_path]') + return + } + + const modelPath = (isBatch ? args[1] : args[1]) || DEFAULT_MODEL + if (!fs.existsSync(modelPath)) { + console.error(`Error: Model file not found: ${modelPath}`) + console.error('Set WHISPER_MODEL_PATH or pass as second argument.') + return + } + + const bci = new BCIWhispercpp({ modelPath }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + console.log('Model loaded.\n') + + if (isBatch) { + const manifestPath = path.join(__dirname, '..', 'test', 'fixtures', 'manifest.json') + const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')) + + console.log(`=== BCI Neural Signal Transcription (Batch: ${manifest.samples.length} samples) ===\n`) + + const startTime = Date.now() + + for (const sample of manifest.samples) { + const samplePath = path.join(__dirname, '..', 'test', 'fixtures', sample.file) + if (!fs.existsSync(samplePath)) { + console.log(` [SKIP] ${sample.file} (not found)`) + continue + } + + const result = await bci.transcribeFile(samplePath) + const wer = BCIWhispercpp.computeWER(result.text, sample.expected_text) + + console.log(` [${sample.file}]`) + console.log(` Got: "${result.text}"`) + console.log(` Expected: "${sample.expected_text}"`) + console.log(` WER: ${(wer * 100).toFixed(1)}%\n`) + } + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + console.log(`Time: ${elapsed}s`) + } else { + const signalPath = args[0] + if (!fs.existsSync(signalPath)) { + console.error(`Error: Signal file not found: ${signalPath}`) + return + } + + const buf = fs.readFileSync(signalPath) + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength) + const T = view.getUint32(0, true) + const C = view.getUint32(4, true) + + console.log('=== BCI Neural Signal Transcription ===') + console.log(`Signal: ${signalPath}`) + console.log(`Timesteps: ${T}, Channels: ${C}`) + console.log(`Duration: ~${(T * 20 / 1000).toFixed(1)}s\n`) + + const startTime = Date.now() + const result = await bci.transcribeFile(signalPath) + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2) + + console.log(`Text: "${result.text}"`) + console.log(`Time: ${elapsed}s`) + } + + await bci.destroy() + console.log('\nDone.') +} + +main().catch((err) => { + console.error('Error:', err.message || err) +}) diff --git a/packages/bci-whispercpp/index.d.ts b/packages/bci-whispercpp/index.d.ts new file mode 100644 index 0000000000..d020bac91b --- /dev/null +++ b/packages/bci-whispercpp/index.d.ts @@ -0,0 +1,105 @@ +declare interface BCIConfig { + smooth_kernel_std?: number; + smooth_kernel_size?: number; + sample_rate?: number; + day_idx?: number; +} + +declare interface WhisperConfig { + language?: string; + n_threads?: number; + temperature?: number; + suppress_nst?: boolean; + duration_ms?: number; + translate?: boolean; + no_timestamps?: boolean; + single_segment?: boolean; + [key: string]: unknown; +} + +declare interface BCIWhispercppArgs { + modelPath: string; + logger?: { + debug(...args: unknown[]): void; + info(...args: unknown[]): void; + warn(...args: unknown[]): void; + error(...args: unknown[]): void; + }; +} + +declare interface BCIWhispercppConfig { + whisperConfig?: WhisperConfig; + bciConfig?: BCIConfig; + contextParams?: { + model?: string; + use_gpu?: boolean; + flash_attn?: boolean; + gpu_device?: number; + }; + miscConfig?: { + caption_enabled?: boolean; + }; +} + +declare interface TranscriptSegment { + text: string; + toAppend: boolean; + start: number; + end: number; + id: number; +} + +declare interface TranscriptionResult { + text: string; + segments: TranscriptSegment[]; + stats: Record | null; +} + +/** + * BCI neural signal transcription client powered by whisper.cpp. + */ +declare class BCIWhispercpp { + constructor(args: BCIWhispercppArgs, config?: BCIWhispercppConfig); + + /** Load and activate the model. */ + load(): Promise; + + /** Transcribe a neural signal binary file. */ + transcribeFile(filePath: string): Promise; + + /** Transcribe neural signal data (batch). */ + transcribe(neuralData: Uint8Array): Promise; + + /** Transcribe a stream of neural signal chunks. */ + transcribeStream( + signalStream: AsyncIterable + ): Promise; + + /** Cancel current inference. */ + cancel(): Promise; + + /** Destroy the instance and release resources. */ + destroy(): Promise; +} + +/** + * Compute Word Error Rate between hypothesis and reference strings. + * @returns WER as a ratio (0.0 = perfect). + */ +declare function computeWER(hypothesis: string, reference: string): number; + +declare namespace BCIWhispercpp { + export { + BCIWhispercpp as default, + BCIWhispercpp, + BCIConfig, + WhisperConfig, + BCIWhispercppArgs, + BCIWhispercppConfig, + TranscriptSegment, + TranscriptionResult, + computeWER, + }; +} + +export = BCIWhispercpp; diff --git a/packages/bci-whispercpp/index.js b/packages/bci-whispercpp/index.js new file mode 100644 index 0000000000..2b1a304979 --- /dev/null +++ b/packages/bci-whispercpp/index.js @@ -0,0 +1,206 @@ +'use strict' + +const fs = require('bare-fs') + +const { BCIInterface } = require('./bci') +const { checkConfig } = require('./configChecker') +const { QvacErrorAddonBCI, ERR_CODES } = require('./lib/error') +const { computeWER } = require('./lib/wer') + +const END_OF_INPUT = 'end of job' + +/** + * High-level BCI transcription client powered by whisper.cpp. + * Accepts neural signal streams and returns text transcriptions. + */ +class BCIWhispercpp { + /** + * @param {Object} args + * @param {string} args.modelPath - path to whisper GGML model file + * @param {Object} [args.logger] - optional logger + * @param {Object} config - inference configuration + * @param {Object} config.whisperConfig - whisper decoding params + * @param {Object} [config.bciConfig] - BCI-specific params + * @param {Object} [config.contextParams] - whisper context params + */ + constructor ({ modelPath, logger = null }, config = {}) { + this._modelPath = modelPath + this._logger = logger || { debug () {}, info () {}, warn () {}, error () {} } + this._config = config + this._addon = null + this._hasActiveResponse = false + this._pendingResolve = null + this._pendingReject = null + this._segments = [] + this._stats = null + + if (!this._modelPath || !fs.existsSync(this._modelPath)) { + throw new Error(`Model file doesn't exist: ${this._modelPath}`) + } + } + + /** + * Load and activate the model. + */ + async load () { + const whisperConfig = { + language: 'en', + temperature: 0.0, + suppress_nst: true, + n_threads: 0, + ...(this._config.whisperConfig || {}) + } + + const configurationParams = { + contextParams: { + model: this._modelPath, + ...(this._config.contextParams || {}) + }, + whisperConfig, + miscConfig: { + caption_enabled: false, + ...(this._config.miscConfig || {}) + } + } + + if (this._config.bciConfig) { + configurationParams.bciConfig = this._config.bciConfig + } + + checkConfig(configurationParams) + + const binding = require('./binding') + this._addon = new BCIInterface( + binding, + configurationParams, + this._outputCallback.bind(this), + this._logger.info.bind(this._logger) + ) + + await this._addon.activate() + this._logger.info('BCI addon activated') + } + + /** + * Transcribe a neural signal from a binary file. + * Binary format: [uint32 numTimesteps, uint32 numChannels, float32[] data] + * @param {string} filePath - path to .bin neural signal file + * @returns {Promise} - { text, segments, stats } + */ + async transcribeFile (filePath) { + const data = fs.readFileSync(filePath) + return this.transcribe(new Uint8Array(data)) + } + + /** + * Transcribe neural signal data (batch mode). + * @param {Uint8Array} neuralData - binary neural signal + * @returns {Promise} - { text, segments, stats } + */ + async transcribe (neuralData) { + if (this._hasActiveResponse) { + throw new QvacErrorAddonBCI({ code: ERR_CODES.JOB_ALREADY_RUNNING }) + } + + return new Promise((resolve, reject) => { + this._beginJob(resolve, reject) + + this._addon.runJob({ input: neuralData }).catch((err) => { + this._clearJob() + reject(err) + }) + }) + } + + /** + * Streaming transcription: accepts an async iterable of neural signal chunks. + * Each chunk is appended and processing starts on end-of-stream. + * @param {AsyncIterable} signalStream + * @returns {Promise} - { text, segments, stats } + */ + async transcribeStream (signalStream) { + if (this._hasActiveResponse) { + throw new QvacErrorAddonBCI({ code: ERR_CODES.JOB_ALREADY_RUNNING }) + } + + const promise = new Promise((resolve, reject) => { + this._beginJob(resolve, reject) + }) + + try { + await this._addon.append({ type: 'neural', input: new Uint8Array() }) + + for await (const chunk of signalStream) { + await this._addon.append({ + type: 'neural', + input: new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength) + }) + } + + await this._addon.append({ type: END_OF_INPUT }) + } catch (err) { + this._clearJob() + throw err + } + + return promise + } + + _beginJob (resolve, reject) { + this._segments = [] + this._stats = null + this._hasActiveResponse = true + this._pendingResolve = resolve + this._pendingReject = reject + } + + _clearJob () { + this._hasActiveResponse = false + this._pendingResolve = null + this._pendingReject = null + } + + _outputCallback (addon, event, jobId, data, error) { + if (event === 'Output') { + if (Array.isArray(data)) { + this._segments.push(...data) + } else if (data && data.text) { + this._segments.push(data) + } + } else if (event === 'JobEnded') { + this._stats = data + const segments = this._segments + const stats = this._stats + const resolve = this._pendingResolve + this._clearJob() + if (resolve) { + const text = segments.map(s => s.text).join('').trim() + resolve({ text, segments, stats }) + } + } else if (event === 'Error') { + const reject = this._pendingReject + this._clearJob() + if (reject) { + reject(new Error(error || 'Transcription failed')) + } + } + } + + async cancel () { + if (this._addon?.cancel) { + await this._addon.cancel() + } + this._clearJob() + } + + async destroy () { + await this.cancel() + if (this._addon) { + await this._addon.destroyInstance() + } + } +} + +module.exports = BCIWhispercpp +module.exports.BCIWhispercpp = BCIWhispercpp +module.exports.computeWER = computeWER diff --git a/packages/bci-whispercpp/lib/error.js b/packages/bci-whispercpp/lib/error.js new file mode 100644 index 0000000000..bf9ad4c7e4 --- /dev/null +++ b/packages/bci-whispercpp/lib/error.js @@ -0,0 +1,76 @@ +'use strict' + +const { QvacErrorBase, addCodes } = require('@qvac/error') + +class QvacErrorAddonBCI extends QvacErrorBase { } + +const { name, version } = require('../package.json') + +const ERR_CODES = Object.freeze({ + FAILED_TO_LOAD_WEIGHTS: 7001, + FAILED_TO_CANCEL: 7002, + FAILED_TO_APPEND: 7003, + FAILED_TO_GET_STATUS: 7004, + FAILED_TO_DESTROY: 7005, + FAILED_TO_ACTIVATE: 7006, + FAILED_TO_RESET: 7007, + FAILED_TO_PAUSE: 7008, + INVALID_NEURAL_INPUT: 7009, + JOB_ALREADY_RUNNING: 7010, + MODEL_NOT_LOADED: 7011 +}) + +addCodes({ + [ERR_CODES.FAILED_TO_LOAD_WEIGHTS]: { + name: 'FAILED_TO_LOAD_WEIGHTS', + message: (message) => `Failed to load weights, error: ${message}` + }, + [ERR_CODES.FAILED_TO_CANCEL]: { + name: 'FAILED_TO_CANCEL', + message: (message) => `Failed to cancel inference, error: ${message}` + }, + [ERR_CODES.FAILED_TO_APPEND]: { + name: 'FAILED_TO_APPEND', + message: (message) => `Failed to append data to processing queue, error: ${message}` + }, + [ERR_CODES.FAILED_TO_GET_STATUS]: { + name: 'FAILED_TO_GET_STATUS', + message: (message) => `Failed to get addon status, error: ${message}` + }, + [ERR_CODES.FAILED_TO_DESTROY]: { + name: 'FAILED_TO_DESTROY', + message: (message) => `Failed to destroy instance, error: ${message}` + }, + [ERR_CODES.FAILED_TO_ACTIVATE]: { + name: 'FAILED_TO_ACTIVATE', + message: (message) => `Failed to activate model, error: ${message}` + }, + [ERR_CODES.FAILED_TO_RESET]: { + name: 'FAILED_TO_RESET', + message: (message) => `Failed to reset model state, error: ${message}` + }, + [ERR_CODES.FAILED_TO_PAUSE]: { + name: 'FAILED_TO_PAUSE', + message: (message) => `Failed to pause inference, error: ${message}` + }, + [ERR_CODES.INVALID_NEURAL_INPUT]: { + name: 'INVALID_NEURAL_INPUT', + message: (message) => `Invalid neural signal input: ${message}` + }, + [ERR_CODES.JOB_ALREADY_RUNNING]: { + name: 'JOB_ALREADY_RUNNING', + message: () => 'Cannot set new job: a job is already set or being processed' + }, + [ERR_CODES.MODEL_NOT_LOADED]: { + name: 'MODEL_NOT_LOADED', + message: () => 'Model is not loaded' + } +}, { + name, + version +}) + +module.exports = { + ERR_CODES, + QvacErrorAddonBCI +} diff --git a/packages/bci-whispercpp/lib/wer.js b/packages/bci-whispercpp/lib/wer.js new file mode 100644 index 0000000000..9a99084c27 --- /dev/null +++ b/packages/bci-whispercpp/lib/wer.js @@ -0,0 +1,40 @@ +'use strict' + +/** + * Compute Word Error Rate between hypothesis and reference. + * Uses Levenshtein distance on word sequences. + * @param {string} hypothesis + * @param {string} reference + * @returns {number} WER as a ratio (0.0 = perfect, 1.0 = 100% errors) + */ +function computeWER (hypothesis, reference) { + const hyp = hypothesis.toLowerCase().trim().split(/\s+/).filter(Boolean) + const ref = reference.toLowerCase().trim().split(/\s+/).filter(Boolean) + + if (ref.length === 0) return hyp.length === 0 ? 0 : 1 + + const n = ref.length + const m = hyp.length + const dp = Array.from({ length: n + 1 }, () => Array(m + 1).fill(0)) + + for (let i = 0; i <= n; i++) dp[i][0] = i + for (let j = 0; j <= m; j++) dp[0][j] = j + + for (let i = 1; i <= n; i++) { + for (let j = 1; j <= m; j++) { + if (ref[i - 1] === hyp[j - 1]) { + dp[i][j] = dp[i - 1][j - 1] + } else { + dp[i][j] = 1 + Math.min( + dp[i - 1][j], + dp[i][j - 1], + dp[i - 1][j - 1] + ) + } + } + } + + return dp[n][m] / n +} + +module.exports = { computeWER } diff --git a/packages/bci-whispercpp/package.json b/packages/bci-whispercpp/package.json new file mode 100644 index 0000000000..ef7ef8f4f7 --- /dev/null +++ b/packages/bci-whispercpp/package.json @@ -0,0 +1,77 @@ +{ + "name": "@qvac/bci-whispercpp", + "version": "0.1.0", + "description": "Brain-Computer Interface (BCI) neural signal transcription addon for qvac, powered by whisper.cpp", + "addon": true, + "engines": { + "bare": ">=1.19.0" + }, + "scripts": { + "lint": "standard \"examples/**/*.js\" \"test/**/*.js\" \"*.js\"", + "lint:fix": "standard --fix \"examples/**/*.js\" \"test/**/*.js\" \"**/*.js\"", + "build": "bare-make generate && bare-make build && bare-make install", + "test:unit": "brittle-bare test/unit/**/*.test.js", + "test:integration": "brittle-bare test/integration/bci-addon.test.js", + "test:cpp:build": "bare-make generate -D BUILD_TESTING=ON && bare-make build --target test-bci-core && bare-make install", + "test:cpp:run": "cd build/addon/tests/ && ./test-bci-core --gtest_output=xml:cpp-test-results.xml", + "test:cpp": "npm run test:cpp:build && npm run test:cpp:run", + "test": "npm run test:integration", + "test:dts": "tsc index.d.ts --noEmit --lib es2018 --esModuleInterop --skipLibCheck" + }, + "files": [ + "binding.js", + "bci.js", + "configChecker.js", + "index.js", + "index.d.ts", + "prebuilds", + "lib", + "LICENSE", + "NOTICE" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/tetherto/qvac.git" + }, + "author": "Tether", + "keywords": [ + "tether", + "addon", + "whisper", + "bci", + "brain-computer-interface", + "neural", + "qvac" + ], + "license": "Apache-2.0", + "bugs": "https://github.com/tetherto/qvac/issues", + "homepage": "https://github.com/tetherto/qvac#readme", + "devDependencies": { + "bare-buffer": "^3.4.2", + "bare-fs": "^4.5.1", + "bare-tty": "^5.0.3", + "brittle": "^3.17.0", + "cmake-bare": "^1.7.5", + "cmake-vcpkg": "^1.1.0", + "fs": "npm:bare-fs", + "os": "npm:bare-os@^3.6.2", + "standard": "^17.1.2", + "tty": "npm:bare-node-tty" + }, + "dependencies": { + "@qvac/error": "^0.1.0", + "@qvac/logging": "^0.1.0", + "bare-path": "^3.0.0", + "bare-stream": "^2.7.0", + "path": "npm:bare-path" + }, + "exports": { + "./package": "./package.json", + ".": { + "types": "./index.d.ts", + "default": "./index.js" + }, + "./binding.js": "./binding.js" + }, + "types": "index.d.ts" +} diff --git a/packages/bci-whispercpp/scripts/convert-model.py b/packages/bci-whispercpp/scripts/convert-model.py new file mode 100644 index 0000000000..0077aababc --- /dev/null +++ b/packages/bci-whispercpp/scripts/convert-model.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +""" +Convert BrainWhisperer checkpoint to GGML model + embedder weights for whisper.cpp. + +Produces two files required for BCI inference: + 1. GGML model (--output): whisper encoder/decoder weights, tokenizer, positional + embedding, windowed attention params in header + 2. Embedder file (--embedder-output): day projection weights (low-rank AยทB per day), + month projections, session-to-day mapping + +Both files must be in the same directory at runtime. The C++ addon loads the embedder +from the same directory as the GGML model (looks for "bci-embedder.bin"). + +Usage: + python3 scripts/convert-model.py \\ + --checkpoint /path/to/epoch=93-val_wer=0.0910.ckpt \\ + --output models/ggml-bci-windowed.bin \\ + --embedder-output models/bci-embedder.bin +""" + +import argparse +import math +import os +import struct + +import numpy as np +import torch + + +def merge_lora_weights(state_dict, alpha=16, r=8): + scaling = alpha / r + merged = {} + lora_pairs = {} + + for key, tensor in state_dict.items(): + if ".lora_A.default.weight" in key: + base_key = key.replace(".lora_A.default.weight", "") + lora_pairs.setdefault(base_key, {})["A"] = tensor + elif ".lora_B.default.weight" in key: + base_key = key.replace(".lora_B.default.weight", "") + lora_pairs.setdefault(base_key, {})["B"] = tensor + elif ".base_layer." in key: + clean_key = key.replace(".base_layer.", ".") + merged[clean_key] = tensor.clone() + else: + merged[key] = tensor + + for base_key, pair in lora_pairs.items(): + if "A" not in pair or "B" not in pair: + continue + A, B = pair["A"], pair["B"] + delta = (B @ A) * scaling + weight_key = base_key + ".weight" + if weight_key in merged: + merged[weight_key] = merged[weight_key] + delta + + return merged + + +def build_positional_embedding(state_dict, d_model=384, day_idx=0, sessions=None): + """Build the combined positional embedding for whisper.cpp. + + The BCI encoder applies two separate positional encodings: + 1. Learned time positions (embed_positions) โ†’ first d_model//2 dims + 2. Sinusoidal day encoding (PositionalEncoding) โ†’ last d_model//2 dims + + whisper.cpp applies a single encoder.positional_embedding after conv2, + so we must combine both into one (1500, d_model) tensor. + """ + half = d_model - d_model // 2 # 192 + + pe = np.zeros((1500, d_model), dtype=np.float32) + + # First half: learned time positional encoding from the trained model + time_pe_key = "model.whisper.model.encoder.embed_positions.weight" + if time_pe_key in state_dict: + time_pe = state_dict[time_pe_key].numpy() # (1500, 192) + pe[:, :half] = time_pe + print(f" Time positional encoding: shape={time_pe.shape}, " + f"range=[{time_pe.min():.4f}, {time_pe.max():.4f}]") + else: + print(" WARNING: embed_positions.weight not found, using zeros for time encoding") + + # Second half: sinusoidal day encoding + # For day_idx=0 (session index), resolve through SessionsToDays to get day number + # Default: day_number=0 โ†’ PositionalEncoding(192) at position 0 = [sin(0),cos(0),...] = [0,1,0,1,...] + day_number = day_idx + if sessions: + from datetime import datetime + sorted_sessions = sorted(sessions) + fmt = "%Y.%m.%d" + datetimes = [datetime.strptime(s[-10:], fmt) for s in sorted_sessions] + if day_idx < len(datetimes): + day_number = (datetimes[day_idx] - datetimes[0]).days + + day_enc = np.zeros(half, dtype=np.float32) + div_term = np.exp(np.arange(0, half, 2, dtype=np.float32) * (-math.log(10000.0) / half)) + day_enc[0::2] = np.sin(day_number * div_term) + day_enc[1::2] = np.cos(day_number * div_term) + pe[:, -half:] = day_enc + print(f" Day encoding: day_number={day_number}, " + f"range=[{day_enc.min():.4f}, {day_enc.max():.4f}]") + + return pe + + +# Byte encoder/decoder for tokenizer (from whisper.cpp converter) +def bytes_to_unicode(): + bs = list(range(ord("!"), ord("~")+1)) + list(range(ord("ยก"), ord("ยฌ")+1)) + list(range(ord("ยฎ"), ord("รฟ")+1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8+n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +# GGML tensor name mapping (HuggingFace โ†’ whisper.cpp) +CONV_MAP = { + 'self_attn.k_proj': 'attn.key', + 'self_attn.q_proj': 'attn.query', + 'self_attn.v_proj': 'attn.value', + 'self_attn.out_proj': 'attn.out', + 'self_attn_layer_norm': 'attn_ln', + 'encoder_attn.q_proj': 'cross_attn.query', + 'encoder_attn.v_proj': 'cross_attn.value', + 'encoder_attn.out_proj': 'cross_attn.out', + 'encoder_attn_layer_norm': 'cross_attn_ln', + 'fc1': 'mlp.0', + 'fc2': 'mlp.2', + 'final_layer_norm': 'mlp_ln', +} + + +def rename_key(hf_key): + """Convert HuggingFace key to whisper.cpp GGML key.""" + parts = hf_key.split(".") + if len(parts) < 2: + return hf_key + + section = parts[0] # encoder or decoder + rest = parts[1:] + + if rest[0] == "layers": + rest[0] = "blocks" + layer_idx = rest[1] + inner = ".".join(rest[2:-1]) + + if inner == "encoder_attn.k_proj": + mapped = "cross_attn.key" + elif inner in CONV_MAP: + mapped = CONV_MAP[inner] + else: + mapped = inner + + return f"{section}.blocks.{layer_idx}.{mapped}.{rest[-1]}" + else: + simple_map = { + "layer_norm.bias": f"{section}.ln_post.bias" if section == "encoder" else f"{section}.ln.bias", + "layer_norm.weight": f"{section}.ln_post.weight" if section == "encoder" else f"{section}.ln.weight", + "embed_positions.weight": f"{section}.positional_embedding", + "embed_tokens.weight": f"{section}.token_embedding.weight", + } + rest_str = ".".join(rest) + if rest_str in simple_map: + return simple_map[rest_str] + return f"{section}.{rest_str}" + + +def export_embedder(state_dict, output_path): + """Export day projection / embedder weights to a binary file. + + The C++ NeuralProcessor loads this file to apply day-specific + projection (low-rank AยทB + month + softsign) before whisper inference. + Without it, raw smoothed signals are passed directly โ€” producing garbage. + """ + conv1_w = state_dict['model.embedders.0.conv1.weight'].numpy().flatten() + conv1_b = state_dict['model.embedders.0.conv1.bias'].numpy().flatten() + conv2_w = state_dict['model.embedders.0.conv2.weight'].numpy().flatten() + conv2_b = state_dict['model.embedders.0.conv2.bias'].numpy().flatten() + + embed_dim = int(state_dict['model.embedders.0.conv1.weight'].shape[0]) + num_features = int(state_dict['model.embedders.0.conv1.weight'].shape[1]) + kernel_size1 = int(state_dict['model.embedders.0.conv1.weight'].shape[2]) + kernel_size2 = int(state_dict['model.embedders.0.conv2.weight'].shape[2]) + + day_a_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_As.')], + key=lambda k: int(k.split('.')[-1])) + day_b_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_Bs.')], + key=lambda k: int(k.split('.')[-1])) + day_bias_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.day_biases.')], + key=lambda k: int(k.split('.')[-1])) + month_w_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.month_weights.')], + key=lambda k: int(k.split('.')[-1])) + month_b_keys = sorted( + [k for k in state_dict if k.startswith('model.embedders.0.month_biases.')], + key=lambda k: int(k.split('.')[-1])) + + num_days = len(day_a_keys) + num_months = len(month_w_keys) + r = int(state_dict[day_a_keys[0]].shape[1]) if day_a_keys else 0 + + s2d = state_dict.get('model.embedders.0.sessions_to_days.session_to_idx_map') + + EMBEDDER_MAGIC = 0x42434945 + os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True) + + with open(output_path, "wb") as f: + f.write(struct.pack('I', EMBEDDER_MAGIC)) + f.write(struct.pack('I', 1)) # version + f.write(struct.pack('I', num_features)) + f.write(struct.pack('I', embed_dim)) + f.write(struct.pack('I', kernel_size1)) + f.write(struct.pack('I', kernel_size2)) + f.write(struct.pack('I', 2)) # stride2 + f.write(struct.pack('I', num_days)) + f.write(struct.pack('I', num_months)) + f.write(struct.pack('I', r)) + + for arr in [conv1_w, conv1_b, conv2_w, conv2_b]: + f.write(struct.pack('I', len(arr))) + f.write(arr.astype(np.float32).tobytes()) + + if s2d is not None: + s2d_np = s2d.numpy().astype(np.int32).flatten() + f.write(struct.pack('I', len(s2d_np))) + f.write(s2d_np.tobytes()) + else: + f.write(struct.pack('I', 0)) + + for i in range(num_days): + for keys in [day_a_keys, day_b_keys, day_bias_keys]: + data = state_dict[keys[i]].numpy().flatten().astype(np.float32) + f.write(struct.pack('I', len(data))) + f.write(data.tobytes()) + + for i in range(num_months): + for keys in [month_w_keys, month_b_keys]: + data = state_dict[keys[i]].numpy().flatten().astype(np.float32) + f.write(struct.pack('I', len(data))) + f.write(data.tobytes()) + + size_mb = os.path.getsize(output_path) / (1024 * 1024) + print(f" Embedder: {output_path} ({size_mb:.1f} MB)") + print(f" {num_days} days, {num_months} months, rank={r}, " + f"features={num_features}") + + +def main(): + parser = argparse.ArgumentParser( + description="Convert BrainWhisperer checkpoint to GGML model + embedder") + parser.add_argument("--checkpoint", required=True, + help="Path to BrainWhisperer .ckpt file") + parser.add_argument("--output", default="models/ggml-bci-windowed.bin", + help="Output path for GGML model (default: models/ggml-bci-windowed.bin)") + parser.add_argument("--embedder-output", default="models/bci-embedder.bin", + help="Output path for embedder weights (default: models/bci-embedder.bin)") + parser.add_argument("--f32", action="store_true", + help="Use f32 for all tensors (avoids f16 precision loss)") + parser.add_argument("--day-idx", type=int, default=1, + help="Day index for baked positional embedding (default: 1)") + parser.add_argument("--window-size", type=int, default=57, + help="Windowed attention size, 0 to disable (default: 57)") + parser.add_argument("--last-window-layer", type=int, default=3, + help="Last encoder layer with windowed attention (default: 3)") + args = parser.parse_args() + + os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True) + + # Load checkpoint + print(f"Loading checkpoint: {args.checkpoint}") + ckpt = torch.load(args.checkpoint, map_location="cpu", weights_only=False) + state_dict = ckpt["state_dict"] + config = ckpt["hyper_parameters"]["config"] + + # Merge LoRA + print("Merging LoRA weights...") + merged = merge_lora_weights(state_dict, alpha=16, r=8) + + # Build the model state dict for GGML + # We need: encoder (conv1/conv2 from embedder, layers 0-5 from encoder, layer_norm) + # decoder (LoRA-merged layers 0-3, embed_tokens, embed_positions, layer_norm) + # proj_out + + model_sd = {} + + # --- Encoder conv1 from EMBEDDER (k=7, 512->384) โ€” patched whisper.cpp supports this --- + model_sd["encoder.conv1.weight"] = merged["model.embedders.0.conv1.weight"] # (384, 512, 7) + model_sd["encoder.conv1.bias"] = merged["model.embedders.0.conv1.bias"] # (384,) + + # --- Encoder conv2 from EMBEDDER (k=3, stride=2) --- + model_sd["encoder.conv2.weight"] = merged["model.embedders.0.conv2.weight"] # (384, 384, 3) + model_sd["encoder.conv2.bias"] = merged["model.embedders.0.conv2.bias"] # (384,) + + # --- Encoder positional embedding (combined time + day encoding) --- + # Extract sessions list from checkpoint config for day number resolution + sessions = config.get("dataset", {}).get("sessions", None) + if sessions is None: + sessions = config.get("sessions", None) + print("Building combined positional embedding...") + model_sd["encoder.positional_embedding"] = torch.from_numpy( + build_positional_embedding(merged, d_model=384, day_idx=args.day_idx, sessions=sessions)) + + # --- Encoder transformer layers 0-5 --- + for layer_idx in range(6): + prefix_src = f"model.whisper.model.encoder.layers.{layer_idx}." + for key, tensor in merged.items(): + if key.startswith(prefix_src): + suffix = key[len("model.whisper.model.encoder."):] + ggml_name = rename_key(f"encoder.{suffix}") + model_sd[ggml_name] = tensor + + # --- Encoder layer norm --- + model_sd["encoder.ln_post.weight"] = merged["model.whisper.model.encoder.layer_norm.weight"] + model_sd["encoder.ln_post.bias"] = merged["model.whisper.model.encoder.layer_norm.bias"] + + # --- Decoder (LoRA-merged) --- + dec_prefix = "model.whisper.model.decoder." + for key, tensor in merged.items(): + if not key.startswith(dec_prefix): + continue + # Remove PEFT wrapper + clean = key[len("model.whisper.model."):] + clean = clean.replace("decoder.base_model.model.", "decoder.") + ggml_name = rename_key(clean) + model_sd[ggml_name] = tensor + + # --- proj_out --- + if "model.whisper.proj_out.weight" in merged: + # whisper.cpp skips proj_out (uses decoder.token_embedding transposed) + pass + + # Model hyperparameters + d_model = 384 + n_audio_head = 6 + n_audio_layer = 6 + n_text_head = 6 + n_text_layer = 4 + n_mels = 512 # neural signal channels (conv1 k=7 in patched whisper.cpp) + n_conv1_kernel = 7 + n_vocab = 51864 + n_audio_ctx = 1500 + n_text_ctx = 448 + + print(f"\nGGML model: n_mels={n_mels}, encoder_layers={n_audio_layer}, " + f"decoder_layers={n_text_layer}, d_model={d_model}") + print(f"Tensors to write: {len(model_sd)}") + + # Mel filters: must have n_mel rows matching the header n_mels value, + # because whisper_set_mel_with_state validates n_mel == filters.n_mel. + mel_filters = np.zeros((n_mels, 201), dtype=np.float32) + + # Load tokenizer + from transformers import WhisperTokenizer + tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-tiny.en") + tokens_dict = tokenizer.get_vocab() + tokens_sorted = sorted(tokens_dict.items(), key=lambda x: x[1]) + + byte_decoder = {v: k for k, v in bytes_to_unicode().items()} + + # Write GGML file + print(f"\nWriting GGML model to: {args.output}") + with open(args.output, "wb") as fout: + # Magic + fout.write(struct.pack("i", 0x67676d6c)) + + # Header (matches whisper.cpp expected order) + fout.write(struct.pack("i", n_vocab)) + fout.write(struct.pack("i", n_audio_ctx)) + fout.write(struct.pack("i", d_model)) + fout.write(struct.pack("i", n_audio_head)) + fout.write(struct.pack("i", n_audio_layer)) + fout.write(struct.pack("i", n_text_ctx)) + fout.write(struct.pack("i", d_model)) + fout.write(struct.pack("i", n_text_head)) + fout.write(struct.pack("i", n_text_layer)) + fout.write(struct.pack("i", n_mels)) + ftype_global = 0 if args.f32 else 1 + fout.write(struct.pack("i", ftype_global)) # ftype: 0=f32, 1=f16 + fout.write(struct.pack("i", n_conv1_kernel)) # BCI extension + fout.write(struct.pack("i", args.window_size)) # BCI windowed attention + fout.write(struct.pack("i", args.last_window_layer)) + + # Mel filters (n_mels x 201, must match n_mels for whisper_set_mel validation) + fout.write(struct.pack("i", mel_filters.shape[0])) + fout.write(struct.pack("i", mel_filters.shape[1])) + for i in range(mel_filters.shape[0]): + for j in range(mel_filters.shape[1]): + fout.write(struct.pack("f", mel_filters[i][j])) + + # Tokenizer + fout.write(struct.pack("i", len(tokens_sorted))) + for token_str, token_id in tokens_sorted: + try: + text = bytearray([byte_decoder[c] for c in token_str]) + except KeyError: + text = token_str.encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) + + # Write tensors + for name, tensor in model_sd.items(): + data = tensor.squeeze().numpy() + + # Reshape conv bias from [n] to [n, 1] + if name in ["encoder.conv1.bias", "encoder.conv2.bias"]: + data = data.reshape(data.shape[0], 1) + + n_dims = len(data.shape) + + use_f16 = not args.f32 + ftype = 1 if use_f16 else 0 + if n_dims < 2 or \ + name == "encoder.conv1.bias" or \ + name == "encoder.conv2.bias" or \ + name == "encoder.positional_embedding" or \ + name == "decoder.positional_embedding": + use_f16 = False + ftype = 0 + + if use_f16: + data = data.astype(np.float16) + else: + data = data.astype(np.float32) + + # Tensor header: n_dims, name_len, ftype + name_bytes = name.encode("utf-8") + fout.write(struct.pack("iii", n_dims, len(name_bytes), ftype)) + + # Dims (reversed from numpy, as GGML expects) + for i in range(n_dims): + fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) + + fout.write(name_bytes) + data.tofile(fout) + + print(f" {name}: {data.shape} ({'f16' if ftype == 1 else 'f32'})") + + size_mb = os.path.getsize(args.output) / (1024 * 1024) + print(f" GGML model: {args.output} ({size_mb:.1f} MB)") + + # --- Export embedder weights --- + print(f"\nWriting embedder weights to: {args.embedder_output}") + export_embedder(state_dict, args.embedder_output) + + print(f"\nDone. Both files are required for inference:") + print(f" {args.output}") + print(f" {args.embedder_output}") + + +if __name__ == "__main__": + main() diff --git a/packages/bci-whispercpp/scripts/download-models.sh b/packages/bci-whispercpp/scripts/download-models.sh new file mode 100755 index 0000000000..4fc8a19c8f --- /dev/null +++ b/packages/bci-whispercpp/scripts/download-models.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PACKAGE_DIR="$(dirname "$SCRIPT_DIR")" +MODELS_DIR="${PACKAGE_DIR}/models" + +mkdir -p "$MODELS_DIR" + +MODEL_NAME="ggml-tiny.en.bin" +MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/${MODEL_NAME}" +MODEL_PATH="${MODELS_DIR}/${MODEL_NAME}" + +if [ -f "$MODEL_PATH" ]; then + echo "Model already exists: ${MODEL_PATH}" +else + echo "Downloading ${MODEL_NAME}..." + curl -L "$MODEL_URL" -o "$MODEL_PATH" + echo "Downloaded to: ${MODEL_PATH}" +fi + +echo "Done." diff --git a/packages/bci-whispercpp/test/fixtures/manifest.json b/packages/bci-whispercpp/test/fixtures/manifest.json new file mode 100644 index 0000000000..1223a73316 --- /dev/null +++ b/packages/bci-whispercpp/test/fixtures/manifest.json @@ -0,0 +1,54 @@ +{ + "samples": [ + { + "file": "neural_sample_0.bin", + "timesteps": 910, + "channels": 512, + "expected_text": "You can see the code at this point as well.", + "day_idx": 1, + "bci_transcription": "you can see the good at this point as well", + "bci_wer_vs_expected": null, + "bci_wer": 0.1 + }, + { + "file": "neural_sample_1.bin", + "timesteps": 749, + "channels": 512, + "expected_text": "How does it keep the cost down?", + "day_idx": 1, + "bci_transcription": "how does it keep the cost said", + "bci_wer_vs_expected": null, + "bci_wer": 0.1429 + }, + { + "file": "neural_sample_2.bin", + "timesteps": 502, + "channels": 512, + "expected_text": "Not too controversial.", + "day_idx": 1, + "bci_transcription": "not too controversial", + "bci_wer_vs_expected": null, + "bci_wer": 0.0 + }, + { + "file": "neural_sample_3.bin", + "timesteps": 962, + "channels": 512, + "expected_text": "The jury and a judge work together on it.", + "day_idx": 1, + "bci_transcription": "the jury and a judge work together on it", + "bci_wer_vs_expected": null, + "bci_wer": 0.0 + }, + { + "file": "neural_sample_4.bin", + "timesteps": 584, + "channels": 512, + "expected_text": "Were quite vocal about it.", + "day_idx": 1, + "bci_transcription": "we're quite vocal about it", + "bci_wer_vs_expected": null, + "bci_wer": 0.2 + } + ] +} diff --git a/packages/bci-whispercpp/test/integration/bci-addon.test.js b/packages/bci-whispercpp/test/integration/bci-addon.test.js new file mode 100644 index 0000000000..c4c3fb33d4 --- /dev/null +++ b/packages/bci-whispercpp/test/integration/bci-addon.test.js @@ -0,0 +1,161 @@ +'use strict' + +const fs = require('bare-fs') +const path = require('bare-path') +const test = require('brittle') +const os = require('bare-os') +const BCIWhispercpp = require('../../index') +const { getTestPaths, computeWER, detectPlatform } = require('./helpers') + +const platform = detectPlatform() +const { manifest, getSamplePath } = getTestPaths() + +const MODEL_PATH = (os.hasEnv('WHISPER_MODEL_PATH') ? os.getEnv('WHISPER_MODEL_PATH') : null) || + path.join(__dirname, '..', '..', 'models', 'ggml-tiny.en.bin') + +const hasModel = fs.existsSync(MODEL_PATH) + +test('[BCI] load and destroy via package interface', { skip: !hasModel, timeout: 120000 }, async (t) => { + const bci = new BCIWhispercpp({ modelPath: MODEL_PATH }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + t.ok(bci, 'BCIWhispercpp should be created and loaded') + + await bci.destroy() + t.pass('BCIWhispercpp destroyed successfully') +}) + +test('[BCI] batch transcription from neural signal file', { skip: !hasModel, timeout: 120000 }, async (t) => { + if (manifest.samples.length === 0) { + t.skip('No neural signal test fixtures found') + return + } + + const sample = manifest.samples[0] + const samplePath = getSamplePath(sample.file) + if (!fs.existsSync(samplePath)) { + t.skip(`Sample file missing: ${samplePath}`) + return + } + + const bci = new BCIWhispercpp({ modelPath: MODEL_PATH }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + try { + await bci.load() + + const result = await bci.transcribeFile(samplePath) + + console.log('\n=== Batch Transcription Result ===') + console.log(`Expected: "${sample.expected_text}"`) + console.log(`Got: "${result.text}"`) + + const wer = computeWER(result.text, sample.expected_text) + console.log(`WER: ${(wer * 100).toFixed(1)}%`) + + t.ok(typeof result.text === 'string', 'Should produce a transcription string') + t.ok(result.segments, 'Should have segments') + t.ok(typeof wer === 'number' && wer >= 0, 'WER should be a non-negative number') + console.log('\nNote: High WER expected - standard whisper model is not BCI-trained.') + console.log('A BCI-trained GGML model is needed for meaningful neural-to-text results.') + } finally { + await bci.destroy() + } +}) + +test('[BCI] streaming transcription from neural signal chunks', { skip: !hasModel, timeout: 120000 }, async (t) => { + if (manifest.samples.length === 0) { + t.skip('No neural signal test fixtures found') + return + } + + const sample = manifest.samples[1] || manifest.samples[0] + const samplePath = getSamplePath(sample.file) + if (!fs.existsSync(samplePath)) { + t.skip(`Sample file missing: ${samplePath}`) + return + } + + const bci = new BCIWhispercpp({ modelPath: MODEL_PATH }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + try { + await bci.load() + + const fullData = fs.readFileSync(samplePath) + const chunkSize = Math.ceil(fullData.length / 3) + + async function * generateChunks () { + for (let i = 0; i < fullData.length; i += chunkSize) { + const end = Math.min(i + chunkSize, fullData.length) + yield new Uint8Array(fullData.buffer, fullData.byteOffset + i, end - i) + } + } + + const result = await bci.transcribeStream(generateChunks()) + + console.log('\n=== Streaming Transcription Result ===') + console.log(`Expected: "${sample.expected_text}"`) + console.log(`Got: "${result.text}"`) + + const wer = computeWER(result.text, sample.expected_text) + console.log(`WER: ${(wer * 100).toFixed(1)}%`) + + t.ok(typeof result.text === 'string', 'Streaming should produce transcription') + t.ok(typeof wer === 'number', 'WER should be computable') + } finally { + await bci.destroy() + } +}) + +test('[BCI] WER measurement across all test samples', { skip: !hasModel, timeout: 180000 }, async (t) => { + if (manifest.samples.length === 0) { + t.skip('No neural signal test fixtures found') + return + } + + console.log(`\n=== WER Report (${manifest.samples.length} samples) ===`) + console.log(`Platform: ${platform.label}`) + console.log(`Model: ${MODEL_PATH}\n`) + + const bci = new BCIWhispercpp({ modelPath: MODEL_PATH }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + const results = [] + + try { + await bci.load() + + for (const sample of manifest.samples) { + const samplePath = getSamplePath(sample.file) + if (!fs.existsSync(samplePath)) continue + + const result = await bci.transcribeFile(samplePath) + const wer = computeWER(result.text, sample.expected_text) + results.push({ expected: sample.expected_text, got: result.text, wer }) + + console.log(` [${sample.file}]`) + console.log(` Expected: "${sample.expected_text}"`) + console.log(` Got: "${result.text}"`) + console.log(` WER: ${(wer * 100).toFixed(1)}%\n`) + } + } finally { + await bci.destroy() + } + + const avgWER = results.reduce((sum, r) => sum + r.wer, 0) / results.length + console.log(` Average WER: ${(avgWER * 100).toFixed(1)}%`) + console.log(` Samples tested: ${results.length}`) + + t.ok(results.length > 0, 'Should have tested at least one sample') + t.ok(typeof avgWER === 'number', 'Average WER should be computable') +}) diff --git a/packages/bci-whispercpp/test/integration/helpers.js b/packages/bci-whispercpp/test/integration/helpers.js new file mode 100644 index 0000000000..7e2d251343 --- /dev/null +++ b/packages/bci-whispercpp/test/integration/helpers.js @@ -0,0 +1,34 @@ +'use strict' + +const fs = require('bare-fs') +const path = require('bare-path') +const { computeWER } = require('../../lib/wer') + +function getTestPaths () { + const fixturesDir = path.join(__dirname, '..', 'fixtures') + const manifestPath = path.join(fixturesDir, 'manifest.json') + + let manifest = { samples: [] } + if (fs.existsSync(manifestPath)) { + manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')) + } + + return { + fixturesDir, + manifest, + getSamplePath: (filename) => path.join(fixturesDir, filename) + } +} + +function detectPlatform () { + const os = require('bare-os') + const arch = os.arch() + const platform = os.platform() + return { arch, platform, label: `${platform}-${arch}` } +} + +module.exports = { + getTestPaths, + detectPlatform, + computeWER +} diff --git a/packages/bci-whispercpp/test/mobile/integration-runtime.cjs b/packages/bci-whispercpp/test/mobile/integration-runtime.cjs new file mode 100644 index 0000000000..8f5205535e --- /dev/null +++ b/packages/bci-whispercpp/test/mobile/integration-runtime.cjs @@ -0,0 +1,3 @@ +'use strict' + +console.log('[bci-integration-runtime] Mobile integration tests initialized') diff --git a/packages/bci-whispercpp/test/mobile/integration.auto.cjs b/packages/bci-whispercpp/test/mobile/integration.auto.cjs new file mode 100644 index 0000000000..053ef379af --- /dev/null +++ b/packages/bci-whispercpp/test/mobile/integration.auto.cjs @@ -0,0 +1,74 @@ +'use strict' +require('./integration-runtime.cjs') + +const BCIWhispercpp = require('@qvac/bci-whispercpp') + +function getAssetPath (filename) { + if (global.assetPaths) { + const key = `../../testAssets/${filename}` + if (global.assetPaths[key]) { + return global.assetPaths[key].replace('file://', '') + } + throw new Error(`Asset not found: ${filename}. Ensure it is in test/mobile/testAssets/`) + } + const path = require('bare-path') + return path.join(__dirname, 'testAssets', filename) +} + +async function runLoadAndDestroyTest (options = {}) { // eslint-disable-line no-unused-vars + const result = { summary: { total: 1, passed: 0, failed: 0 }, output: '' } + try { + const modelPath = getAssetPath('ggml-bci-windowed.bin') + const bci = new BCIWhispercpp({ modelPath }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + await bci.destroy() + + result.summary.passed = 1 + result.output = 'Model loaded and destroyed successfully' + console.log('[BCI] Load and destroy: PASS') + } catch (err) { + result.summary.failed = 1 + result.output = err.message || String(err) + console.error('[BCI] Load and destroy: FAIL -', result.output) + } + return result +} + +async function runTranscriptionTest (options = {}) { // eslint-disable-line no-unused-vars + const result = { summary: { total: 1, passed: 0, failed: 0 }, output: '' } + try { + const modelPath = getAssetPath('ggml-bci-windowed.bin') + const samplePath = getAssetPath('neural_sample_2.bin') + + const bci = new BCIWhispercpp({ modelPath }, { + whisperConfig: { language: 'en', temperature: 0.0 }, + miscConfig: { caption_enabled: false } + }) + + await bci.load() + const transcription = await bci.transcribeFile(samplePath) + await bci.destroy() + + const text = transcription.text || '' + console.log(`[BCI] Transcription result: "${text}"`) + + if (typeof text === 'string' && text.length > 0) { + result.summary.passed = 1 + result.output = `Transcribed: "${text}"` + console.log('[BCI] Transcription: PASS') + } else { + result.summary.failed = 1 + result.output = 'Empty transcription result' + console.error('[BCI] Transcription: FAIL - empty result') + } + } catch (err) { + result.summary.failed = 1 + result.output = err.message || String(err) + console.error('[BCI] Transcription: FAIL -', result.output) + } + return result +} diff --git a/packages/bci-whispercpp/test/mobile/testAssets/.gitignore b/packages/bci-whispercpp/test/mobile/testAssets/.gitignore new file mode 100644 index 0000000000..a8a0dcec44 --- /dev/null +++ b/packages/bci-whispercpp/test/mobile/testAssets/.gitignore @@ -0,0 +1 @@ +*.bin diff --git a/packages/bci-whispercpp/vcpkg-configuration.json b/packages/bci-whispercpp/vcpkg-configuration.json new file mode 100644 index 0000000000..cf90bf82c2 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-configuration.json @@ -0,0 +1,17 @@ +{ + "default-registry": { + "kind": "git", + "baseline": "87ef7179f70122d0cc65a5991b88c20cab59b1e1", + "repository": "git@github.com:tetherto/qvac-registry-vcpkg.git" + }, + "registries": [ + { + "kind": "git", + "baseline": "16c71a39e5a0fc0bdb3fad03beef8f38ee00ee3b", + "repository": "https://github.com/microsoft/vcpkg", + "packages": [ + "gtest" + ] + } + ] +} diff --git a/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/portfile.cmake b/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/portfile.cmake new file mode 100644 index 0000000000..ff8c032cac --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/portfile.cmake @@ -0,0 +1,7 @@ +file(WRITE "${CURRENT_PACKAGES_DIR}/share/${PORT}/.clang-format" "") +file(WRITE "${CURRENT_PACKAGES_DIR}/share/${PORT}/.clang-tidy" "") +file(WRITE "${CURRENT_PACKAGES_DIR}/share/${PORT}/.valgrind.supp" "") +file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}/hooks") +file(WRITE "${CURRENT_PACKAGES_DIR}/tools/${PORT}/hooks/pre-commit" "#!/bin/sh\nexit 0\n") +file(WRITE "${CURRENT_PACKAGES_DIR}/share/${PORT}/copyright" "Stub overlay port") + diff --git a/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/vcpkg.json b/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/vcpkg.json new file mode 100644 index 0000000000..0a180e7609 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/qvac-lint-cpp/vcpkg.json @@ -0,0 +1,5 @@ +{ + "name": "qvac-lint-cpp", + "version-string": "1.4.1", + "description": "No-op overlay โ€” linting headers not needed for runtime builds" +} diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0001-fix-vcpkg-build.patch b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0001-fix-vcpkg-build.patch new file mode 100644 index 0000000000..e587ea07d4 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0001-fix-vcpkg-build.patch @@ -0,0 +1,277 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 36eef350..dfcc171d 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -23,10 +23,18 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(WHISPER_STANDALONE ON) + +- include(git-vars) ++ find_package(Git QUIET) ++ if(GIT_FOUND) ++ include(git-vars) ++ else() ++ set(GIT_SHA1 "unknown") ++ set(GIT_DATE "unknown") ++ set(GIT_COMMIT_SUBJECT "unknown") ++ endif() + +- # configure project version +- configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY) ++ if(EXISTS ${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json) ++ configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY) ++ endif() + else() + set(WHISPER_STANDALONE OFF) + endif() +@@ -169,23 +177,34 @@ set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER}) + set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT}) + set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION}) + +-set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") ++set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}/whisper CACHE PATH "Location of header files") + set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") + set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") + + get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS) + + set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h) +-install(TARGETS whisper LIBRARY PUBLIC_HEADER) ++ ++install( ++ TARGETS whisper ++ EXPORT whisper-targets ++ PUBLIC_HEADER ++ DESTINATION ${WHISPER_INCLUDE_INSTALL_DIR}) ++ ++install( ++ EXPORT whisper-targets ++ FILE whisper-targets.cmake ++ NAMESPACE whisper:: ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/whisper) ++ ++install( ++ FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/whisper) + + configure_package_config_file( +- ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in +- ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake +- INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper +- PATH_VARS +- WHISPER_INCLUDE_INSTALL_DIR +- WHISPER_LIB_INSTALL_DIR +- WHISPER_BIN_INSTALL_DIR ) ++ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in ++ ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake ++ INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/whisper) + + write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake +@@ -194,7 +213,7 @@ write_basic_package_version_file( + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake +- DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper) ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/whisper) + + configure_file(cmake/whisper.pc.in + "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc" +diff --git a/cmake/git-vars.cmake b/cmake/git-vars.cmake +index 1a4c24eb..8dc51859 100644 +--- a/cmake/git-vars.cmake ++++ b/cmake/git-vars.cmake +@@ -1,22 +1,36 @@ + find_package(Git) + +-# the commit's SHA1 +-execute_process(COMMAND +- "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 +- WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" +- OUTPUT_VARIABLE GIT_SHA1 +- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ++if(GIT_FOUND) ++ execute_process(COMMAND ++ "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 ++ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" ++ OUTPUT_VARIABLE GIT_SHA1 ++ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE ++ RESULT_VARIABLE GIT_SHA1_RESULT) + +-# the date of the commit +-execute_process(COMMAND +- "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local +- WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" +- OUTPUT_VARIABLE GIT_DATE +- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ++ execute_process(COMMAND ++ "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local ++ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" ++ OUTPUT_VARIABLE GIT_DATE ++ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE ++ RESULT_VARIABLE GIT_DATE_RESULT) + +-# the subject of the commit +-execute_process(COMMAND +- "${GIT_EXECUTABLE}" log -1 --format=%s +- WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" +- OUTPUT_VARIABLE GIT_COMMIT_SUBJECT +- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) ++ execute_process(COMMAND ++ "${GIT_EXECUTABLE}" log -1 --format=%s ++ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" ++ OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ++ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE ++ RESULT_VARIABLE GIT_COMMIT_SUBJECT_RESULT) ++endif() ++ ++if(NOT GIT_FOUND OR GIT_SHA1_RESULT OR NOT GIT_SHA1) ++ set(GIT_SHA1 "unknown") ++endif() ++ ++if(NOT GIT_FOUND OR GIT_DATE_RESULT OR NOT GIT_DATE) ++ set(GIT_DATE "unknown") ++endif() ++ ++if(NOT GIT_FOUND OR GIT_COMMIT_SUBJECT_RESULT OR NOT GIT_COMMIT_SUBJECT) ++ set(GIT_COMMIT_SUBJECT "unknown") ++endif() +diff --git a/cmake/whisper-config.cmake.in b/cmake/whisper-config.cmake.in +index 6a3fa227..9fe65884 100644 +--- a/cmake/whisper-config.cmake.in ++++ b/cmake/whisper-config.cmake.in +@@ -11,24 +11,21 @@ set(GGML_ACCELERATE @GGML_ACCELERATE@) + + @PACKAGE_INIT@ + +-set_and_check(WHISPER_INCLUDE_DIR "@PACKAGE_WHISPER_INCLUDE_INSTALL_DIR@") +-set_and_check(WHISPER_LIB_DIR "@PACKAGE_WHISPER_LIB_INSTALL_DIR@") +-set_and_check(WHISPER_BIN_DIR "@PACKAGE_WHISPER_BIN_INSTALL_DIR@") ++include(CMakeFindDependencyMacro) + + # Ensure transient dependencies satisfied +- +-find_package(Threads REQUIRED) ++find_dependency(Threads REQUIRED) + + if (APPLE AND GGML_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) + endif() + + if (GGML_BLAS) +- find_package(BLAS REQUIRED) ++ find_dependency(BLAS REQUIRED) + endif() + + if (GGML_CUDA) +- find_package(CUDAToolkit REQUIRED) ++ find_dependency(CUDAToolkit REQUIRED) + endif() + + if (GGML_METAL) +@@ -38,28 +35,13 @@ if (GGML_METAL) + endif() + + if (GGML_HIPBLAS) +- find_package(hip REQUIRED) +- find_package(hipblas REQUIRED) +- find_package(rocblas REQUIRED) ++ find_dependency(hip REQUIRED) ++ find_dependency(hipblas REQUIRED) ++ find_dependency(rocblas REQUIRED) + endif() + +-find_library(whisper_LIBRARY whisper +- REQUIRED +- HINTS ${WHISPER_LIB_DIR}) +- +-set(_whisper_link_deps "Threads::Threads" "@WHISPER_EXTRA_LIBS@") +-set(_whisper_transient_defines "@WHISPER_TRANSIENT_DEFINES@") +- +-add_library(whisper UNKNOWN IMPORTED) ++find_dependency(ggml CONFIG REQUIRED) + +-set_target_properties(whisper +- PROPERTIES +- INTERFACE_INCLUDE_DIRECTORIES "${WHISPER_INCLUDE_DIR}" +- INTERFACE_LINK_LIBRARIES "${_whisper_link_deps}" +- INTERFACE_COMPILE_DEFINITIONS "${_whisper_transient_defines}" +- IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" +- IMPORTED_LOCATION "${whisper_LIBRARY}" +- INTERFACE_COMPILE_FEATURES cxx_std_11 +- POSITION_INDEPENDENT_CODE ON ) ++include("${CMAKE_CURRENT_LIST_DIR}/whisper-targets.cmake") + + check_required_components(whisper) +diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt +index 4e7399f9..fd3ccebe 100644 +--- a/ggml/CMakeLists.txt ++++ b/ggml/CMakeLists.txt +@@ -277,8 +277,17 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") + #if (GGML_METAL) + # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") + #endif() +-install(TARGETS ggml LIBRARY PUBLIC_HEADER) +-install(TARGETS ggml-base LIBRARY) ++install( ++ TARGETS ggml ggml-base ++ EXPORT ggml-targets ++ PUBLIC_HEADER ++ DESTINATION ${GGML_INCLUDE_INSTALL_DIR}) ++ ++install( ++ EXPORT ggml-targets ++ FILE ggml-targets.cmake ++ NAMESPACE ggml:: ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/ggml) + + if (GGML_STANDALONE) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in +@@ -349,7 +358,7 @@ set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of + configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake +- INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml ++ INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/ggml + PATH_VARS GGML_INCLUDE_INSTALL_DIR + GGML_LIB_INSTALL_DIR + GGML_BIN_INSTALL_DIR) +@@ -361,7 +370,7 @@ write_basic_package_version_file( + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake +- DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) ++ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/ggml) + + if (MSVC) + set(MSVC_WARNING_FLAGS +diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt +index 9cb2c228..6396d883 100644 +--- a/ggml/src/CMakeLists.txt ++++ b/ggml/src/CMakeLists.txt +@@ -231,7 +231,7 @@ function(ggml_add_backend_library backend) + else() + add_library(${backend} ${ARGN}) + target_link_libraries(ggml PUBLIC ${backend}) +- install(TARGETS ${backend} LIBRARY) ++ install(TARGETS ${backend} EXPORT ggml-targets) + endif() + + target_link_libraries(${backend} PRIVATE ggml-base) +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 2eae0c66..cd4c60e8 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -114,7 +114,11 @@ set_target_properties(whisper PROPERTIES + SOVERSION ${SOVERSION} + ) + +-target_include_directories(whisper PUBLIC . ../include) ++target_include_directories( ++ whisper ++ PUBLIC ++ $ ++ $) + target_compile_features (whisper PUBLIC cxx_std_11) # don't bump + + if (CMAKE_CXX_BYTE_ORDER STREQUAL "BIG_ENDIAN") diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0002-fix-apple-silicon-cross-compile.patch b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0002-fix-apple-silicon-cross-compile.patch new file mode 100644 index 0000000000..f8154f1f92 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0002-fix-apple-silicon-cross-compile.patch @@ -0,0 +1,15 @@ +diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt +index fd3cceb..d072fe6 100644 +--- a/ggml/CMakeLists.txt ++++ b/ggml/CMakeLists.txt +@@ -58,7 +58,9 @@ else() + set(GGML_BLAS_VENDOR_DEFAULT "Generic") + endif() + +-if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH}) ++if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH} OR ++ (APPLE AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64" AND ++ CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")) + message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF") + set(GGML_NATIVE_DEFAULT OFF) + else() diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0003-bci-variable-conv1-kernel.patch b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0003-bci-variable-conv1-kernel.patch new file mode 100644 index 0000000000..025f8c29c0 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0003-bci-variable-conv1-kernel.patch @@ -0,0 +1,28 @@ +diff --git a/src/whisper.cpp b/src/whisper.cpp +--- a/src/whisper.cpp ++++ b/src/whisper.cpp +@@ -633,6 +633,7 @@ + int32_t n_mels = 80; + int32_t ftype = 1; + float eps = 1e-5f; ++ int32_t n_audio_conv1_kernel = 3; + }; + + // audio encoding layer +@@ -1535,6 +1536,7 @@ + read_safe(loader, hparams.n_text_layer); + read_safe(loader, hparams.n_mels); + read_safe(loader, hparams.ftype); ++ read_safe(loader, hparams.n_audio_conv1_kernel); + + assert(hparams.n_text_state == hparams.n_audio_state); + +@@ -1775,7 +1777,7 @@ + // encoder + model.e_pe = create_tensor(ASR_TENSOR_ENC_POS_EMBD, ASR_SYSTEM_ENCODER, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_audio_state, n_audio_ctx)); + +- model.e_conv_1_w = create_tensor(ASR_TENSOR_CONV1_WEIGHT, ASR_SYSTEM_ENCODER, ggml_new_tensor_3d(ctx, vtype, 3, n_mels, n_audio_state)); ++ model.e_conv_1_w = create_tensor(ASR_TENSOR_CONV1_WEIGHT, ASR_SYSTEM_ENCODER, ggml_new_tensor_3d(ctx, vtype, hparams.n_audio_conv1_kernel, n_mels, n_audio_state)); + model.e_conv_1_b = create_tensor(ASR_TENSOR_CONV1_BIAS, ASR_SYSTEM_ENCODER, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 1, n_audio_state)); + + model.e_conv_2_w = create_tensor(ASR_TENSOR_CONV2_WEIGHT, ASR_SYSTEM_ENCODER, ggml_new_tensor_3d(ctx, vtype, 3, n_audio_state, n_audio_state)); diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0004-bci-windowed-attention.patch b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0004-bci-windowed-attention.patch new file mode 100644 index 0000000000..9161158071 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/0004-bci-windowed-attention.patch @@ -0,0 +1,97 @@ +diff --git a/src/whisper.cpp b/src/whisper.cpp +--- a/src/whisper.cpp ++++ b/src/whisper.cpp +@@ -633,6 +633,8 @@ + int32_t ftype = 1; + float eps = 1e-5f; + int32_t n_audio_conv1_kernel = 3; ++ int32_t n_audio_window_size = 0; ++ int32_t n_audio_last_window_layer = -1; + }; + + // audio encoding layer +@@ -1536,6 +1538,8 @@ + read_safe(loader, hparams.n_mels); + read_safe(loader, hparams.ftype); + read_safe(loader, hparams.n_audio_conv1_kernel); ++ read_safe(loader, hparams.n_audio_window_size); ++ read_safe(loader, hparams.n_audio_last_window_layer); + + assert(hparams.n_text_state == hparams.n_audio_state); + +@@ -2114,6 +2118,15 @@ + + struct ggml_tensor * inpL = cur; + ++ struct ggml_tensor * window_mask = nullptr; ++ const int window_size = hparams.n_audio_window_size; ++ const int last_window_layer = hparams.n_audio_last_window_layer; ++ if (window_size > 0 && last_window_layer >= 0) { ++ window_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_ctx, n_ctx, 1); ++ ggml_set_name(window_mask, "window_mask"); ++ ggml_set_input(window_mask); ++ } ++ + for (int il = 0; il < n_layer; ++il) { + const auto & layer = model.layers_encoder[il]; + +@@ -2177,7 +2190,8 @@ + ggml_element_size(kv_pad.v)*n_state_head, + 0); + +- cur = ggml_flash_attn_ext(ctx0, Q, K, V, nullptr, KQscale, 0.0f, 0.0f); ++ struct ggml_tensor * attn_mask_fa = (window_mask && il <= last_window_layer) ? window_mask : nullptr; ++ cur = ggml_flash_attn_ext(ctx0, Q, K, V, attn_mask_fa, KQscale, 0.0f, 0.0f); + + cur = ggml_reshape_2d(ctx0, cur, n_state, n_ctx); + } else { +@@ -2191,7 +2205,8 @@ + // K * Q + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + +- struct ggml_tensor * KQ_soft_max = ggml_soft_max_ext(ctx0, KQ, nullptr, KQscale, 0.0f); ++ struct ggml_tensor * enc_attn_mask = (window_mask && il <= last_window_layer) ? window_mask : nullptr; ++ struct ggml_tensor * KQ_soft_max = ggml_soft_max_ext(ctx0, KQ, enc_attn_mask, KQscale, 0.0f); + + struct ggml_tensor * V = + ggml_cast(ctx0, +@@ -2442,6 +2457,25 @@ + return false; + } + ++ { ++ struct ggml_tensor * wmask = ggml_graph_get_tensor(gf, "window_mask"); ++ if (wmask) { ++ const int n_ctx = wstate.exp_n_audio_ctx > 0 ++ ? wstate.exp_n_audio_ctx : wctx.model.hparams.n_audio_ctx; ++ const int ws = wctx.model.hparams.n_audio_window_size; ++ const int half_w = ws / 2; ++ std::vector mask_data(n_ctx * n_ctx); ++ for (int i = 0; i < n_ctx; ++i) { ++ for (int j = 0; j < n_ctx; ++j) { ++ mask_data[i * n_ctx + j] = ++ (abs(i - j) <= half_w) ? 0.0f : -INFINITY; ++ } ++ } ++ ggml_backend_tensor_set(wmask, mask_data.data(), 0, ++ n_ctx * n_ctx * sizeof(float)); ++ } ++ } ++ + if (!ggml_graph_compute_helper(sched, gf, n_threads)) { + return false; + } +@@ -6949,7 +6983,12 @@ + } else { + prompt_init.push_back(whisper_token_transcribe(ctx)); + } +- } ++ } else if (ctx->model.hparams.n_audio_window_size > 0) { ++ const int lang_id = whisper_lang_id(params.language); ++ state->lang_id = lang_id; ++ prompt_init.push_back(whisper_token_lang(ctx, lang_id)); ++ prompt_init.push_back(whisper_token_transcribe(ctx)); ++ } + + // first release distilled models require the "no_timestamps" token + { diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/portfile.cmake b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/portfile.cmake new file mode 100644 index 0000000000..52e171819a --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/portfile.cmake @@ -0,0 +1,57 @@ +set(VERSION "a8d002cfd879315632a579e73f0148d06959de36") + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO ggml-org/whisper.cpp + REF ${VERSION} + SHA512 aea24debb836131d14d362ff78c6d12cfe2e82188340e69e71e6874a1fa51fa9405f2c03fe43888b1ff4183f4288bf64f07dd1106224b0108c3e0f844989a409 + HEAD_REF master + PATCHES + 0001-fix-vcpkg-build.patch + 0002-fix-apple-silicon-cross-compile.patch + 0003-bci-variable-conv1-kernel.patch + 0004-bci-windowed-attention.patch +) + +set(PLATFORM_OPTIONS) + +if (VCPKG_TARGET_IS_ANDROID) + list(APPEND PLATFORM_OPTIONS -DWHISPER_NO_AVX=ON -DWHISPER_NO_AVX2=ON -DWHISPER_NO_FMA=ON) + list(APPEND PLATFORM_OPTIONS -DGGML_VULKAN=OFF) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + DISABLE_PARALLEL_CONFIGURE + OPTIONS + -DGGML_CCACHE=OFF + -DGGML_OPENMP=OFF + -DGGML_NATIVE=OFF + -DWHISPER_BUILD_TESTS=OFF + -DWHISPER_BUILD_EXAMPLES=OFF + -DWHISPER_BUILD_SERVER=OFF + -DBUILD_SHARED_LIBS=OFF + -DGGML_BUILD_NUMBER=1 + ${PLATFORM_OPTIONS} +) + +vcpkg_cmake_install() + +vcpkg_cmake_config_fixup( + PACKAGE_NAME whisper + CONFIG_PATH share/whisper +) + +vcpkg_fixup_pkgconfig() + +vcpkg_copy_pdbs() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") + +if (VCPKG_LIBRARY_LINKAGE MATCHES "static") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin") +endif() + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/vcpkg.json b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/vcpkg.json new file mode 100644 index 0000000000..ed9210715e --- /dev/null +++ b/packages/bci-whispercpp/vcpkg-overlays/whisper-cpp/vcpkg.json @@ -0,0 +1,18 @@ +{ + "name": "whisper-cpp", + "version": "1.7.5.1", + "port-version": 1, + "description": "Port of OpenAI's Whisper model in C/C++ (BCI patched)", + "homepage": "https://github.com/tetherto/whisper.cpp", + "license": "MIT", + "dependencies": [ + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ] +} diff --git a/packages/bci-whispercpp/vcpkg.json b/packages/bci-whispercpp/vcpkg.json new file mode 100644 index 0000000000..867b85f130 --- /dev/null +++ b/packages/bci-whispercpp/vcpkg.json @@ -0,0 +1,18 @@ +{ + "name": "bci-whispercpp", + "version-string": "0.1.0", + "dependencies": [ + { + "name": "qvac-lib-inference-addon-cpp", + "version>=": "1.1.5" + }, + "whisper-cpp", + "gtest" + ], + "overrides": [ + { + "name": "whisper-cpp", + "version": "1.7.5.1" + } + ] +}