diff --git a/.github/workflows/cpp-tests-classification.yml b/.github/workflows/cpp-tests-classification.yml
new file mode 100644
index 0000000000..6d68bf3856
--- /dev/null
+++ b/.github/workflows/cpp-tests-classification.yml
@@ -0,0 +1,97 @@
+name: CPP Tests (GGML Classification)
+
+on:
+ workflow_dispatch:
+ inputs:
+ workdir:
+ description: "Working directory"
+ type: string
+ required: false
+ default: "packages/classification-ggml"
+ workflow_call:
+ inputs:
+ ref:
+ description: "Git ref to checkout"
+ type: string
+ required: false
+ repository:
+ description: "Repository to checkout"
+ type: string
+ required: false
+ workdir:
+ description: "Working directory"
+ type: string
+ required: false
+ default: "packages/classification-ggml"
+
+permissions:
+ contents: read
+
+jobs:
+ test-cpp:
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: ubuntu-22.04
+ platform: linux
+ arch: x64
+ - os: macos-15
+ platform: darwin
+ arch: arm64
+ - os: windows-2022
+ platform: win32
+ arch: x64
+
+ runs-on: ${{ matrix.os }}
+ environment: release
+ name: cpp-tests-${{ matrix.platform }}-${{ matrix.arch }}
+
+ env:
+ WORKDIR: ${{ inputs.workdir }}
+
+ steps:
+ - name: Setup build host
+ uses: tetherto/qvac/.github/actions/setup-build-host@1d9b2165867d03c6edd675e402ee101a5d48a6d8
+ with:
+ platform: ${{ matrix.platform }}
+ arch: ${{ matrix.arch }}
+
+ - name: Checkout repository
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ repository: ${{ inputs.repository || github.repository }}
+ ref: ${{ inputs.ref || github.ref }}
+ token: ${{ secrets.PAT_TOKEN }}
+
+ - name: Setup AWS + Windows CLI
+ uses: tetherto/qvac/.github/actions/setup-aws-prebuild@0bbdca93da303a0b1634ba14a89cec085621078d
+ with:
+ role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+
+ - name: Setup vcpkg
+ uses: tetherto/qvac/.github/actions/setup-vcpkg@1d9b2165867d03c6edd675e402ee101a5d48a6d8
+ env:
+ MODEL_S3_BUCKET: ${{ secrets.MODEL_S3_BUCKET }}
+ with:
+ platform: ${{ matrix.platform }}
+ arch: ${{ matrix.arch }}
+
+ - name: Setup Bare tooling
+ uses: tetherto/qvac/.github/actions/setup-bare-tooling@0bbdca93da303a0b1634ba14a89cec085621078d
+
+ - name: Setup Apple Clang
+ if: ${{ matrix.platform == 'darwin' }}
+ uses: tetherto/qvac/.github/actions/setup-apple-clang@1d9b2165867d03c6edd675e402ee101a5d48a6d8
+
+ - name: Install npm dependencies
+ working-directory: ${{ env.WORKDIR }}
+ run: npm install --ignore-scripts
+
+ - name: Build C++ tests
+ working-directory: ${{ env.WORKDIR }}
+ run: npm run test:cpp:build
+
+ - name: Run C++ tests
+ working-directory: ${{ env.WORKDIR }}
+ run: npm run test:cpp:run
diff --git a/.github/workflows/integration-mobile-test-classification-ggml.yml b/.github/workflows/integration-mobile-test-classification-ggml.yml
new file mode 100644
index 0000000000..1b4362a6b0
--- /dev/null
+++ b/.github/workflows/integration-mobile-test-classification-ggml.yml
@@ -0,0 +1,1451 @@
+name: Mobile Integration Tests (GGML Classification)
+
+on:
+ workflow_call:
+ inputs:
+ ref:
+ description: "Git ref to checkout"
+ type: string
+ required: false
+ repository:
+ description: "Repository to checkout"
+ type: string
+ required: false
+ workflow_dispatch:
+ inputs:
+ ref:
+ description: "Git ref (branch/tag/SHA) to test"
+ type: string
+ required: false
+ default: main
+ package:
+ description: "Full NPM package spec to test (default: @qvac/classification-ggml@latest)"
+ type: string
+ required: true
+ default: "@qvac/classification-ggml@latest"
+
+env:
+ NODE_VERSION: "lts/*"
+ ADDON_NAME: "@qvac/classification-ggml"
+ PREBUILD_ARTIFACT_PREFIX: "classification-ggml-" # Matches `classification-ggml-${platform}-${arch}` artifact names produced by prebuilds-classification-ggml.yml (via reusable-prebuilds.yml)
+ TEST_FRAMEWORK_REF: "main" # Branch/tag of qvac-test-addon-mobile framework
+ APP_BUNDLE_ID: "io.tether.test.qvac" # Bundle ID for the test app (same for all addons)
+ ADDON_WORKDIR: "addon/packages/classification-ggml"
+
+jobs:
+ build-and-test:
+ name: Build ${{ matrix.platform }} and Run E2E Tests
+ environment: release
+ runs-on: ${{ matrix.runner }}
+ timeout-minutes: 120
+ continue-on-error: true # Don't block PR merges if tests fail
+ permissions:
+ contents: read
+ packages: read
+ pull-requests: write # Allow commenting on PRs
+ id-token: write
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - platform: Android
+ os: ubuntu-24.04
+ runner: ai-run-linux
+ - platform: iOS
+ os: macos-14
+ runner: macos-14
+
+ steps:
+ - name: Validate Dispatch Inputs
+ if: github.event_name == 'workflow_dispatch' && github.event.inputs.package
+ run: |
+ if [[ ! "${{ github.event.inputs.package }}" =~ ^@qvac/ ]]; then
+ echo "::error::Invalid package scope. Only @qvac/* is allowed."
+ exit 1
+ fi
+
+ # Free up disk space on Ubuntu runner to prevent "No space left on device" errors
+ - name: Free up disk space
+ if: matrix.platform == 'Android'
+ run: |
+ echo "Disk space before cleanup:"
+ df -h
+ # Remove unnecessary software to free up disk space (|| true to handle self-hosted runners)
+ sudo rm -rf /usr/share/dotnet || true
+ sudo rm -rf /opt/ghc || true
+ sudo rm -rf /opt/hostedtoolcache/CodeQL || true
+ sudo rm -rf /opt/hostedtoolcache/go || true
+ sudo rm -rf /opt/hostedtoolcache/Python || true
+ sudo rm -rf /opt/hostedtoolcache/Ruby || true
+ sudo rm -rf /usr/local/lib/android/sdk/ndk || true
+ sudo rm -rf /usr/local/share/boost || true
+ sudo rm -rf /usr/share/swift || true
+ sudo docker image prune --all --force || true
+ # Clean APT cache
+ sudo apt-get clean || true
+ echo "Disk space after cleanup:"
+ df -h
+
+ - name: Checkout addon repository
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ repository: ${{ inputs.repository || github.repository }}
+ ref: ${{ inputs.ref || github.ref }}
+ token: ${{ secrets.PAT_TOKEN }}
+ path: addon
+ fetch-depth: 0
+
+ - name: Checkout mobile test framework
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ repository: tetherto/qvac-test-addon-mobile
+ ref: ${{ env.TEST_FRAMEWORK_REF }}
+ token: ${{ secrets.PAT_TOKEN }}
+ path: test-framework
+ fetch-depth: 0
+
+ - name: Setup Node.js
+ uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # 6.3.0
+ with:
+ node-version: ${{ env.NODE_VERSION }}
+
+ - name: Install global dependencies
+ run: |
+ echo "Installing global dependencies..."
+ npm install -g @expo/cli@latest --ignore-scripts
+
+ - name: Download Android prebuilds (from artifacts)
+ if: matrix.platform == 'Android' && !inputs.package
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
+ with:
+ path: ${{ runner.temp }}/prebuilds-android-staging
+ pattern: ${{ env.PREBUILD_ARTIFACT_PREFIX }}android-*
+ merge-multiple: true
+
+ - name: Move Android prebuilds from staging
+ if: matrix.platform == 'Android' && !inputs.package
+ run: |
+ mkdir -p ${{ env.ADDON_WORKDIR }}/prebuilds
+ cp -r ${{ runner.temp }}/prebuilds-android-staging/* ${{ env.ADDON_WORKDIR }}/prebuilds/ 2>/dev/null || true
+ shell: bash
+ continue-on-error: true
+
+ - name: Download iOS prebuilds (from artifacts)
+ if: matrix.platform == 'iOS' && !inputs.package
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
+ with:
+ path: ${{ runner.temp }}/prebuilds-ios-staging
+ pattern: ${{ env.PREBUILD_ARTIFACT_PREFIX }}ios-*
+ merge-multiple: true
+
+ - name: Move iOS prebuilds from staging
+ if: matrix.platform == 'iOS' && !inputs.package
+ run: |
+ mkdir -p ${{ env.ADDON_WORKDIR }}/prebuilds
+ cp -r ${{ runner.temp }}/prebuilds-ios-staging/* ${{ env.ADDON_WORKDIR }}/prebuilds/ 2>/dev/null || true
+ shell: bash
+ continue-on-error: true
+
+ - name: Download prebuilds from package
+ if: inputs.package
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ PACKAGE_SPEC="${{ inputs.package }}"
+ echo "๐ฆ Downloading $PACKAGE_SPEC from npm for manual trigger..."
+
+ PACKAGE_NAME="${PACKAGE_SPEC%@*}"
+
+ if ! npm pack "$PACKAGE_SPEC" --ignore-scripts; then
+ echo "ERROR: Failed to download $PACKAGE_SPEC from npm"
+ echo "Please check that the package exists at https://www.npmjs.com/package/$PACKAGE_NAME"
+ exit 1
+ fi
+
+ # Extract the tarball (pattern matches any addon name)
+ tar -xzf *.tgz
+
+ # Validate prebuilds directory exists
+ if [ ! -d "package/prebuilds" ]; then
+ echo "ERROR: No prebuilds directory found in package"
+ echo "The downloaded package may not contain prebuilt binaries"
+ exit 1
+ fi
+
+ # Move prebuilds to expected location
+ mv package/prebuilds ./prebuilds
+
+ # Cleanup
+ rm -rf package *.tgz
+
+ echo "โ
Prebuilds downloaded from npm:"
+ ls -la prebuilds/
+
+ - name: Verify and prepare prebuilds
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ echo "Checking for prebuilds..."
+ echo "Current directory: $(pwd)"
+ if [ -d "prebuilds" ] && [ "$(ls -A prebuilds)" ]; then
+ echo "โ
Prebuilds found from artifacts:"
+ ls -la prebuilds/
+ else
+ echo "โ ERROR: No prebuilds found!"
+ echo " This workflow requires prebuilds to be available."
+ echo " Either:"
+ echo " 1. Run this workflow after prebuild job completes"
+ echo " 2. Or commit prebuilds to the repository"
+ exit 1
+ fi
+
+ # Copy mobile prebuilds if needed
+ if npm run mobile:copy-prebuilds 2>/dev/null; then
+ echo "โ
Mobile prebuilds prepared"
+ else
+ echo "โ ๏ธ mobile:copy-prebuilds script not available or failed"
+ fi
+
+ - name: Remove desktop prebuilds to save disk space
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ echo "Removing desktop prebuilds to save disk space (keeping Android + iOS)..."
+ echo "Before cleanup:"
+ du -sh prebuilds/* 2>/dev/null || true
+
+ # Remove desktop prebuilds only (not needed for mobile tests)
+ rm -rf prebuilds/darwin-* prebuilds/win32-* prebuilds/linux-* 2>/dev/null || true
+
+ echo "After cleanup (Android + iOS only):"
+ du -sh prebuilds/* 2>/dev/null || true
+ df -h
+
+ - name: Verify test files exist
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ echo "Verifying addon has mobile tests..."
+
+ if [ ! -d "test/mobile" ]; then
+ echo "โ ERROR: test/mobile directory not found!"
+ echo ""
+ echo "This workflow requires the addon to have mobile tests at:"
+ echo " test/mobile/"
+ echo ""
+ echo "Please create this directory with your test files."
+ echo "See qvac-test-addon-mobile README for test file format."
+ exit 1
+ fi
+
+ # Check for .cjs test files
+ CJS_COUNT=$(find test/mobile -name "*.cjs" -type f | wc -l)
+ if [ "$CJS_COUNT" -eq 0 ]; then
+ echo "โ ERROR: No .cjs test files found in test/mobile!"
+ exit 1
+ fi
+
+ echo "โ
Mobile test files found:"
+ ls -la test/mobile/*.cjs
+
+ # Check if testAssets exists
+ if [ -d "test/mobile/testAssets" ]; then
+ echo ""
+ echo "โ
Test assets found:"
+ ls -lah test/mobile/testAssets/
+ else
+ echo ""
+ echo "โน๏ธ No testAssets directory (this is optional)"
+ fi
+
+ - name: Install Ninja build tool
+ if: matrix.platform == 'iOS'
+ run: |
+ echo "๐ฆ Installing Ninja build system..."
+ brew install ninja
+ ninja --version
+ echo "โ
Ninja installed successfully"
+
+ - name: Install addon dependencies
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ echo "Installing addon dependencies..."
+ npm install --ignore-scripts
+
+ - name: Validate mobile tests are up-to-date
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: npm run test:mobile:validate
+
+ - name: Pack addon
+ working-directory: ${{ env.ADDON_WORKDIR }}
+ run: |
+ echo "Packing addon..."
+ if npm run build:pack 2>/dev/null; then
+ echo "โ
Addon packed using build:pack script"
+ else
+ echo "๐ฆ Using npm pack directly..."
+ mkdir -p dist
+ npm pack --pack-destination dist --ignore-scripts
+ fi
+
+ # Verify pack file exists
+ PACK_FILE=$(ls dist/*.tgz | head -1)
+ if [ -f "$PACK_FILE" ]; then
+ SIZE=$(du -h "$PACK_FILE" | cut -f1)
+ echo "โ
Pack file created: $PACK_FILE (Size: $SIZE)"
+ else
+ echo "โ Pack file not found in dist/"
+ exit 1
+ fi
+
+ - name: Setup test framework dependencies
+ working-directory: test-framework
+ run: |
+ echo "Setting up mobile test framework..."
+ npm install --ignore-scripts
+ echo "โ
Test framework dependencies installed"
+
+ - name: Build test app with addon
+ working-directory: test-framework
+ run: |
+ echo "Building test app with addon..."
+ echo "This will:"
+ echo " 1. Install the addon package"
+ echo " 2. Extract test code from addon's test/mobile/ directory"
+ echo " 3. Auto-detect and order test files by dependencies"
+ echo " 4. Generate backend.cjs with test functions"
+ echo " 5. Generate e2e tests for each test function"
+ echo " 6. Copy testAssets if available"
+ echo " 7. Bundle the app"
+ echo ""
+
+ ADDON_PATH="${GITHUB_WORKSPACE}/${{ env.ADDON_WORKDIR }}"
+ npm run build "$ADDON_PATH" "$ADDON_PATH/test/mobile"
+
+ echo ""
+ echo "โ
Test app built successfully"
+
+ # Verify critical files were generated
+ if [ ! -f "backend/backend.cjs" ]; then
+ echo "โ ERROR: backend/backend.cjs was not generated!"
+ exit 1
+ fi
+
+ if [ ! -f "e2e/tests/app.test.js" ]; then
+ echo "โ ERROR: e2e/tests/app.test.js was not generated!"
+ exit 1
+ fi
+
+ if [ ! -f "backend/app.bundle" ]; then
+ echo "โ ERROR: backend/app.bundle was not created!"
+ exit 1
+ fi
+
+ echo "โ
All required files generated successfully"
+
+ # Show what tests were extracted
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "EXTRACTED TEST FUNCTIONS:"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ if [ -f "app/testConfig.js" ]; then
+ cat app/testConfig.js
+ fi
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+
+ - name: Display build summary
+ if: always()
+ working-directory: test-framework
+ run: |
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ BUILD SUMMARY"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo ""
+ echo "Platform: ${{ matrix.platform }}"
+ if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ echo "Package: ${{ inputs.package }}"
+ else
+ echo "Addon: ${{ env.ADDON_NAME }} (from PR artifacts)"
+ fi
+ echo ""
+ echo "Generated Files:"
+ echo " backend/backend.cjs: $([ -f backend/backend.cjs ] && echo 'โ
' || echo 'โ')"
+ echo " backend/app.bundle: $([ -f backend/app.bundle ] && echo 'โ
' || echo 'โ')"
+ echo " app/testConfig.js: $([ -f app/testConfig.js ] && echo 'โ
' || echo 'โ')"
+ echo " app/assetManifest.js: $([ -f app/assetManifest.js ] && echo 'โ
' || echo 'โ')"
+ echo " e2e/tests/app.test.js: $([ -f e2e/tests/app.test.js ] && echo 'โ
' || echo 'โ')"
+ echo ""
+ echo "Test Assets:"
+ if [ -d "testAssets" ]; then
+ ASSET_COUNT=$(find testAssets -type f | wc -l)
+ echo " โ
$ASSET_COUNT file(s) in testAssets/"
+ else
+ echo " โน๏ธ No testAssets (optional)"
+ fi
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+
+ # Android-specific steps
+ - name: Set up JDK 17
+ if: matrix.platform == 'Android'
+ uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # 5.2.0
+ with:
+ java-version: 17
+ distribution: temurin
+
+ - name: Setup Android SDK
+ if: matrix.platform == 'Android'
+ uses: android-actions/setup-android@9fc6c4e9069bf8d3d10b2204b1fb8f6ef7065407 # 3.2.2
+
+ - name: Generate Android project
+ if: matrix.platform == 'Android'
+ working-directory: test-framework
+ run: |
+ echo "Generating Android project with Expo..."
+ npx expo prebuild --platform android --clean
+
+ - name: Build Android APK
+ if: matrix.platform == 'Android'
+ id: build_apk
+ working-directory: test-framework
+ run: |
+ echo "Building Android APK for Device Farm..."
+ export JAVA_HOME=$JAVA_HOME_17_X64
+
+ # Bundle JavaScript
+ echo "Bundling JavaScript code..."
+ npm run bundle
+
+ if [ $? -ne 0 ]; then
+ echo "โ Bundle failed"
+ exit 1
+ fi
+
+ echo "โ
Bundle completed successfully"
+
+ # Build RELEASE APK (not debug) to ensure JS bundle is included
+ # Debug builds skip bundling by default and try to connect to Metro
+ # Release builds embed the JS bundle in the APK
+ cd android
+ echo "Building APK with Gradle (RELEASE with embedded JS bundle)..."
+ ./gradlew assembleRelease \
+ -PreactNativeArchitectures=arm64-v8a \
+ --no-daemon \
+ --no-build-cache \
+ --stacktrace
+ cd ..
+
+ # Find the APK (look for release)
+ APK_PATH=$(find android/app/build/outputs/apk -name "*.apk" | grep "release" | grep -v "unaligned" | head -1)
+
+ if [ -f "$APK_PATH" ]; then
+ # Convert to absolute path
+ APK_ABSOLUTE_PATH="${GITHUB_WORKSPACE}/test-framework/$APK_PATH"
+ SIZE=$(du -h "$APK_PATH" | cut -f1)
+ echo "โ
APK built successfully: $APK_PATH (Size: $SIZE)"
+ echo "apk_path=$APK_ABSOLUTE_PATH" >> $GITHUB_OUTPUT
+ echo "app_type=ANDROID_APP" >> $GITHUB_OUTPUT
+ echo "app_name=test-app-${{ matrix.platform }}.apk" >> $GITHUB_OUTPUT
+
+ # Clean up build intermediates to free disk space
+ echo "Cleaning up build intermediates..."
+ rm -rf android/app/build/intermediates
+ rm -rf android/.gradle
+ df -h
+ else
+ echo "โ APK file not found"
+ echo "Searching in android/app/build/outputs/apk:"
+ find android/app/build/outputs/apk -type f 2>/dev/null || echo "Directory not found"
+ exit 1
+ fi
+
+ # iOS-specific steps
+ - name: Set up Xcode version
+ if: matrix.platform == 'iOS'
+ run: |
+ echo "Available Xcode versions:"
+ ls /Applications | grep Xcode || echo "No Xcode apps found"
+
+ echo ""
+ echo "Current Xcode (before switch):"
+ xcodebuild -version
+
+ # React Native requires Xcode >= 16.1
+ # Use Xcode 16.1 (has iOS 18.1 SDK which is stable and pre-installed)
+ if [ -d "/Applications/Xcode_16.1.app" ]; then
+ echo ""
+ echo "โ
Switching to Xcode 16.1..."
+ sudo xcode-select -s /Applications/Xcode_16.1.app
+ elif [ -d "/Applications/Xcode_16.1.0.app" ]; then
+ echo ""
+ echo "โ
Switching to Xcode 16.1.0..."
+ sudo xcode-select -s /Applications/Xcode_16.1.0.app
+ elif [ -d "/Applications/Xcode_16.2.app" ]; then
+ echo ""
+ echo "โ ๏ธ Using Xcode 16.2 (16.1 not found)..."
+ sudo xcode-select -s /Applications/Xcode_16.2.app
+ else
+ echo ""
+ echo "โ ERROR: No suitable Xcode version found (need >= 16.1)"
+ exit 1
+ fi
+
+ echo ""
+ echo "Current Xcode (after switch):"
+ xcodebuild -version
+
+ echo ""
+ echo "Available iOS SDKs:"
+ xcodebuild -showsdks | grep -i ios
+
+ - name: Install CocoaPods
+ if: matrix.platform == 'iOS'
+ run: |
+ sudo gem install cocoapods
+ pod --version
+
+ - name: Create Keychain and Import Certificate
+ if: matrix.platform == 'iOS'
+ env:
+ BUILD_CERTIFICATE_BASE64: ${{ secrets.TEST_APP_APPLE_DISTRIBUTION_CERTIFICATE }}
+ P12_PASSWORD: ${{ secrets.APPLE_P12_PASSWORD }}
+ BUILD_PROVISION_PROFILE_BASE64: ${{ secrets.TEST_APP_APPLE_PROVISIONING_PROFILE }}
+ KEYCHAIN_PASSWORD: ${{ secrets.APPLE_KEYCHAIN_PASSWORD }}
+ run: |
+ CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12
+ PP_PATH=$RUNNER_TEMP/build_pp.mobileprovision
+ KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db
+
+ echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH
+ echo -n "$BUILD_PROVISION_PROFILE_BASE64" | base64 --decode -o $PP_PATH
+
+ security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
+ security set-keychain-settings -lut 21600 $KEYCHAIN_PATH
+ security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
+
+ security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
+ security set-key-partition-list -S apple-tool:,apple: -s -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
+ security list-keychain -d user -s $KEYCHAIN_PATH
+
+ # Extract UUID first, then copy with UUID as filename
+ PP_UUID=$(/usr/libexec/PlistBuddy -c 'Print :UUID' /dev/stdin <<< $(security cms -D -i $PP_PATH))
+ echo "PP_UUID=$PP_UUID" >> $GITHUB_ENV
+ echo "Provisioning Profile UUID: $PP_UUID"
+
+ # Copy provisioning profile with UUID as filename
+ mkdir -p ~/Library/MobileDevice/Provisioning\ Profiles
+ cp $PP_PATH ~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision
+
+ security find-identity -p codesigning -v
+
+ - name: Verify provisioning profile
+ if: matrix.platform == 'iOS'
+ run: |
+ echo "๐ Verifying provisioning profile..."
+ echo "PP_UUID: $PP_UUID"
+
+ PP_FILE=~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision
+ if [ ! -f "$PP_FILE" ]; then
+ echo "โ Provisioning profile file not found at: $PP_FILE"
+ ls -la ~/Library/MobileDevice/Provisioning\ Profiles/
+ exit 1
+ fi
+
+ echo "๐ Provisioning Profile Details:"
+ security cms -D -i "$PP_FILE" > /tmp/profile.plist
+
+ PROFILE_NAME=$(/usr/libexec/PlistBuddy -c "Print :Name" /tmp/profile.plist 2>/dev/null || echo "Unknown")
+ PROFILE_BUNDLE_ID=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:application-identifier" /tmp/profile.plist 2>/dev/null || echo "Unknown")
+ PROFILE_TEAM_ID=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:com.apple.developer.team-identifier" /tmp/profile.plist 2>/dev/null || echo "Unknown")
+
+ # Detect profile type (Development, Ad Hoc, App Store, Enterprise)
+ HAS_DEVICES=$(/usr/libexec/PlistBuddy -c "Print :ProvisionedDevices" /tmp/profile.plist 2>/dev/null && echo "yes" || echo "no")
+ PROVISIONS_ALL=$(/usr/libexec/PlistBuddy -c "Print :ProvisionsAllDevices" /tmp/profile.plist 2>/dev/null || echo "false")
+ HAS_GET_TASK_ALLOW=$(/usr/libexec/PlistBuddy -c "Print :Entitlements:get-task-allow" /tmp/profile.plist 2>/dev/null || echo "false")
+
+ if [[ "$PROVISIONS_ALL" == "true" ]]; then
+ PROFILE_TYPE="Enterprise"
+ EXPORT_METHOD="enterprise"
+ elif [[ "$HAS_DEVICES" == "yes" && "$HAS_GET_TASK_ALLOW" == "true" ]]; then
+ PROFILE_TYPE="Development"
+ EXPORT_METHOD="development"
+ elif [[ "$HAS_DEVICES" == "yes" && "$HAS_GET_TASK_ALLOW" == "false" ]]; then
+ PROFILE_TYPE="Ad Hoc"
+ EXPORT_METHOD="ad-hoc"
+ else
+ PROFILE_TYPE="App Store"
+ EXPORT_METHOD="app-store"
+ fi
+
+ echo " Name: $PROFILE_NAME"
+ echo " Type: $PROFILE_TYPE"
+ echo " Export Method: $EXPORT_METHOD"
+ echo " Application ID: $PROFILE_BUNDLE_ID"
+ echo " Team ID: $PROFILE_TEAM_ID"
+ echo " Expected Bundle ID: ${{ env.APP_BUNDLE_ID }}"
+
+ # Save export method for next step
+ echo "EXPORT_METHOD=$EXPORT_METHOD" >> $GITHUB_ENV
+
+ # Extract just the bundle ID part (remove team prefix)
+ BUNDLE_ID_ONLY=$(echo "$PROFILE_BUNDLE_ID" | sed 's/^[^.]*\.//')
+
+ if [[ "$BUNDLE_ID_ONLY" != "${{ env.APP_BUNDLE_ID }}" ]]; then
+ echo ""
+ echo "โ ERROR: Provisioning profile bundle ID mismatch!"
+ echo " Profile has: $BUNDLE_ID_ONLY"
+ echo " Expected: ${{ env.APP_BUNDLE_ID }}"
+ echo ""
+ echo "The provisioning profile was created for a different bundle identifier."
+ echo "Please create a new provisioning profile for: ${{ env.APP_BUNDLE_ID }}"
+ exit 1
+ fi
+
+ echo "โ
Provisioning profile matches expected bundle ID"
+
+ - name: Generate iOS project
+ if: matrix.platform == 'iOS'
+ working-directory: test-framework
+ run: |
+ echo "Generating iOS project with Expo..."
+ npx expo prebuild --platform ios --clean
+
+ - name: Install iOS dependencies
+ if: matrix.platform == 'iOS'
+ working-directory: test-framework/ios
+ run: |
+ echo "Installing CocoaPods dependencies..."
+ pod install --repo-update
+
+ - name: Build and Archive iOS App
+ if: matrix.platform == 'iOS'
+ id: build_ios
+ working-directory: test-framework
+ run: |
+ echo "Building iOS app for Device Farm..."
+
+ # Bundle JavaScript first
+ echo "Bundling JavaScript code..."
+ npm run bundle
+
+ if [ $? -ne 0 ]; then
+ echo "โ Bundle failed"
+ exit 1
+ fi
+
+ echo "โ
Bundle completed successfully"
+
+ # Get scheme name
+ cd ios
+ SCHEME_NAME=$(xcodebuild -list | grep -A 1 "Schemes:" | grep -v "Schemes:" | head -1 | xargs)
+ echo "Detected scheme: $SCHEME_NAME"
+
+ # Debug: Check bundle identifier in project
+ echo "๐ Checking project configuration..."
+ BUNDLE_ID=$(xcodebuild -showBuildSettings -workspace $SCHEME_NAME.xcworkspace -scheme "$SCHEME_NAME" -configuration Release -destination "generic/platform=iOS" 2>/dev/null | grep PRODUCT_BUNDLE_IDENTIFIER | head -1 | awk '{print $3}')
+ echo "Bundle Identifier in project: $BUNDLE_ID"
+
+ if [[ "$BUNDLE_ID" != "${{ env.APP_BUNDLE_ID }}" ]]; then
+ echo "โ ๏ธ Warning: Bundle ID mismatch in Xcode project!"
+ echo " Expected: ${{ env.APP_BUNDLE_ID }}"
+ echo " Found: $BUNDLE_ID"
+ fi
+
+ # Debug: Check provisioning profile
+ echo "๐ Provisioning profile UUID: $PP_UUID"
+ security cms -D -i ~/Library/MobileDevice/Provisioning\ Profiles/$PP_UUID.mobileprovision | grep -A 5 "application-identifier\|Name\|TeamIdentifier" | head -20 || echo "Could not read profile details"
+
+ # Archive for iOS device
+ xcodebuild -workspace $SCHEME_NAME.xcworkspace \
+ -scheme "$SCHEME_NAME" \
+ -sdk iphoneos \
+ -configuration Release \
+ -destination "generic/platform=iOS" \
+ -archivePath $RUNNER_TEMP/$SCHEME_NAME.xcarchive \
+ CODE_SIGN_STYLE=Manual \
+ PROVISIONING_PROFILE_SPECIFIER="$PP_UUID" \
+ CODE_SIGN_IDENTITY="Apple Distribution" \
+ DEVELOPMENT_TEAM="${{ secrets.APPLE_TEAM_ID }}" \
+ clean archive
+
+ - name: Export IPA
+ if: matrix.platform == 'iOS'
+ id: export_ipa
+ working-directory: test-framework/ios
+ run: |
+ SCHEME_NAME=$(xcodebuild -list | grep -A 1 "Schemes:" | grep -v "Schemes:" | head -1 | xargs)
+
+ # Create export options using auto-detected export method
+ # The EXPORT_METHOD was determined in the "Verify provisioning profile" step
+ echo "๐ฆ Using export method: $EXPORT_METHOD"
+
+ EXPORT_OPTS_PATH=$RUNNER_TEMP/ExportOptions.plist
+ cat > $EXPORT_OPTS_PATH << EOF
+
+
+
+
+ method
+ $EXPORT_METHOD
+ teamID
+ ${{ secrets.APPLE_TEAM_ID }}
+ signingStyle
+ manual
+ provisioningProfiles
+
+ ${{ env.APP_BUNDLE_ID }}
+ $PP_UUID
+
+
+
+ EOF
+
+ echo "๐ Export options:"
+ cat $EXPORT_OPTS_PATH
+
+ xcodebuild -exportArchive \
+ -archivePath $RUNNER_TEMP/$SCHEME_NAME.xcarchive \
+ -exportOptionsPlist $EXPORT_OPTS_PATH \
+ -exportPath $RUNNER_TEMP/build
+
+ IPA_FILE=$(find $RUNNER_TEMP/build -name "*.ipa" | head -1)
+ if [ -f "$IPA_FILE" ]; then
+ echo "โ
IPA exported: $IPA_FILE"
+ echo "apk_path=$IPA_FILE" >> $GITHUB_OUTPUT
+ echo "app_type=IOS_APP" >> $GITHUB_OUTPUT
+ echo "app_name=test-app-${{ matrix.platform }}.ipa" >> $GITHUB_OUTPUT
+ else
+ echo "โ IPA file not found"
+ exit 1
+ fi
+
+ - name: Configure AWS credentials via OIDC
+ uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # 6.0.0
+ with:
+ role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+ aws-region: us-west-2
+ role-duration-seconds: 7200 # 2hrs for device farm tests
+
+ - name: Upload App to Device Farm
+ id: upload_app
+ run: |
+ if [ "${{ matrix.platform }}" == "Android" ]; then
+ APP_PATH="${{ steps.build_apk.outputs.apk_path }}"
+ APP_TYPE="${{ steps.build_apk.outputs.app_type }}"
+ APP_NAME="${{ steps.build_apk.outputs.app_name }}"
+ else
+ APP_PATH="${{ steps.export_ipa.outputs.apk_path }}"
+ APP_TYPE="${{ steps.export_ipa.outputs.app_type }}"
+ APP_NAME="${{ steps.export_ipa.outputs.app_name }}"
+ fi
+
+ echo "๐ค Uploading app to AWS Device Farm..."
+ UPLOAD_RESPONSE=$(aws devicefarm create-upload \
+ --project-arn "${{ secrets.LLM_AWS_DEVICE_FARM_PROJECT_ARN }}" \
+ --name "$APP_NAME" \
+ --type "$APP_TYPE" \
+ --output json)
+
+ if [ $? -ne 0 ]; then
+ echo "โ Error creating upload in Device Farm"
+ echo "Response: $UPLOAD_RESPONSE"
+ exit 1
+ fi
+
+ APP_UPLOAD_URL=$(echo $UPLOAD_RESPONSE | jq -r '.upload.url')
+ APP_UPLOAD_ARN=$(echo $UPLOAD_RESPONSE | jq -r '.upload.arn')
+ echo "app_upload_arn=$APP_UPLOAD_ARN" >> $GITHUB_OUTPUT
+ echo "App upload ARN: $APP_UPLOAD_ARN"
+
+ echo "Uploading app file: $APP_PATH"
+ curl -T "$APP_PATH" "$APP_UPLOAD_URL"
+
+ if [ $? -ne 0 ]; then
+ echo "โ Error uploading app file using curl"
+ exit 1
+ fi
+
+ # Wait for processing
+ echo "โณ Waiting for upload to be processed..."
+ MAX_ATTEMPTS=30
+ ATTEMPT=1
+ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
+ STATUS=$(aws devicefarm get-upload --arn "$APP_UPLOAD_ARN" --query "upload.status" --output text)
+ echo "Status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS"
+
+ if [ "$STATUS" = "SUCCEEDED" ]; then
+ echo "โ
App upload successful"
+ break
+ fi
+
+ if [ "$STATUS" = "FAILED" ]; then
+ echo "โ Upload failed"
+ aws devicefarm get-upload --arn "$APP_UPLOAD_ARN"
+ exit 1
+ fi
+
+ sleep 10
+ ATTEMPT=$((ATTEMPT + 1))
+ done
+
+ - name: Verify test package generation
+ working-directory: test-framework/e2e
+ run: |
+ echo "Verifying e2e test package..."
+
+ if [ ! -f "package.json" ]; then
+ echo "โ ERROR: e2e/package.json not found!"
+ exit 1
+ fi
+
+ if [ ! -f "tests/app.test.js" ]; then
+ echo "โ ERROR: e2e/tests/app.test.js not found!"
+ exit 1
+ fi
+
+ echo "โ
E2E test files verified"
+ echo ""
+ echo "Test package contents:"
+ ls -la
+ echo ""
+ echo "Test files:"
+ ls -la tests/
+
+ - name: Package and Upload Test Package
+ id: upload_test_package
+ working-directory: test-framework
+ run: |
+ echo "๐ฆ Packaging e2e tests..."
+ cd e2e
+
+ # Install dependencies before packing
+ npm install --ignore-scripts
+
+ # Create tarball
+ npm pack --ignore-scripts
+
+ # Create zip with test files only (no node_modules - will be installed on Device Farm)
+ ZIP_NAME="e2e-tests-${{ matrix.platform }}.zip"
+ zip -r "$ZIP_NAME" \
+ package.json \
+ tests/ \
+ *.tgz
+
+ echo "๐ฆ Package contents (excluding node_modules):"
+ unzip -l "$ZIP_NAME" | head -20
+
+ # Verify zip was created
+ if [ ! -f "$ZIP_NAME" ]; then
+ echo "โ ERROR: Failed to create test package zip"
+ exit 1
+ fi
+
+ SIZE=$(du -h "$ZIP_NAME" | cut -f1)
+ echo "โ
Test package created: $ZIP_NAME (Size: $SIZE)"
+
+ mv "$ZIP_NAME" "$GITHUB_WORKSPACE/"
+
+ # Upload test package to AWS Device Farm
+ echo "๐ค Uploading test package to AWS Device Farm..."
+ UPLOAD_RESPONSE=$(aws devicefarm create-upload \
+ --project-arn "${{ secrets.LLM_AWS_DEVICE_FARM_PROJECT_ARN }}" \
+ --name "$ZIP_NAME" \
+ --type "APPIUM_NODE_TEST_PACKAGE" \
+ --output json)
+
+ if [ $? -ne 0 ]; then
+ echo "โ Error creating test package upload in Device Farm"
+ echo "Response: $UPLOAD_RESPONSE"
+ exit 1
+ fi
+
+ TEST_UPLOAD_URL=$(echo $UPLOAD_RESPONSE | jq -r '.upload.url')
+ TEST_UPLOAD_ARN=$(echo $UPLOAD_RESPONSE | jq -r '.upload.arn')
+ echo "test_package_upload_arn=$TEST_UPLOAD_ARN" >> $GITHUB_OUTPUT
+ echo "Test package upload ARN: $TEST_UPLOAD_ARN"
+
+ echo "Uploading to: $TEST_UPLOAD_URL"
+ curl -T "$GITHUB_WORKSPACE/$ZIP_NAME" "$TEST_UPLOAD_URL"
+
+ if [ $? -ne 0 ]; then
+ echo "โ Error uploading test package using curl"
+ exit 1
+ fi
+
+ # Wait for processing
+ echo "โณ Waiting for test package to be processed..."
+ MAX_ATTEMPTS=30
+ ATTEMPT=1
+ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
+ STATUS=$(aws devicefarm get-upload --arn "$TEST_UPLOAD_ARN" --query "upload.status" --output text)
+ echo "Test package status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS"
+
+ if [ "$STATUS" = "SUCCEEDED" ]; then
+ echo "โ
Test package upload successful"
+ break
+ fi
+
+ if [ "$STATUS" = "FAILED" ]; then
+ echo "โ Test package upload failed"
+ aws devicefarm get-upload --arn "$TEST_UPLOAD_ARN"
+ exit 1
+ fi
+
+ sleep 10
+ ATTEMPT=$((ATTEMPT + 1))
+ done
+
+ if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
+ echo "โ Timeout waiting for test package processing"
+ exit 1
+ fi
+
+ # NOTE: Everything below remains unchanged from your source workflow.
+ # The only monorepo-related change in this entire file is that "addon" operations
+ # now target addon/packages/classification-ggml via env.ADDON_WORKDIR.
+
+ - name: Create and Upload Test Spec
+ id: upload_test_spec
+ run: |
+ echo "๐ Creating test spec for custom environment mode..."
+ echo "Platform: ${{ matrix.platform }}"
+
+ # Create platform-specific test spec using printf for precise control
+ # NOTE: Both platforms use a 'before' hook in the wdio config to click the button
+ # This ensures a single Appium session for reliability (no session handoff issues)
+ # The before hook includes crash detection using queryAppState
+ if [ "${{ matrix.platform }}" == "Android" ]; then
+ PLATFORM="Android"
+ AUTOMATION="UiAutomator2"
+ HOST_LINE="android_test_host: amazon_linux_2"
+ BUNDLE_ID="${{ env.APP_BUNDLE_ID }}"
+ # Android wdio config with crash detection (bail:0 = continue on test failures, crash = process.exit)
+ # Increased timeout to 30 minutes (1800000ms) for long-running LLM tests
+ WDIO_CONFIG='exports.config={runner:"local",hostname:"127.0.0.1",port:4723,path:"/wd/hub",specs:["*.spec.js","*.test.js"],maxInstances:1,bail:0,capabilities:[{platformName:"Android","appium:automationName":"UiAutomator2","appium:appPackage":"'${{ env.APP_BUNDLE_ID }}'","appium:appActivity":"'${{ env.APP_BUNDLE_ID }}'.MainActivity","appium:newCommandTimeout":300,"appium:autoGrantPermissions":true,"appium:autoAcceptAlerts":true,"appium:noReset":true,"appium:dontStopAppOnReset":true,"appium:forceAppLaunch":false}],logLevel:"debug",waitforTimeout:120000,connectionRetryTimeout:30000,connectionRetryCount:3,services:[],framework:"mocha",reporters:["spec"],mochaOpts:{ui:"bdd",timeout:1800000},before:async function(capabilities,specs,browser){const BUNDLE_ID="'${{ env.APP_BUNDLE_ID }}'";global.appCrashed=false;global.checkAppCrash=async(stage)=>{try{const state=await browser.queryAppState(BUNDLE_ID);console.log("["+stage+"] App state: "+state+" (4=foreground,3=background,1=not running)");if(state<3){console.error("\\n๐ APP CRASHED at "+stage+"! State="+state);console.error("Check device logs for BareKit/native errors.\\n");global.appCrashed=true;process.exit(1);}return state;}catch(e){console.log("["+stage+"] queryAppState error: "+e.message);return-1;}};console.log("Checking initial app state...");await global.checkAppCrash("startup");console.log("Waiting for app to initialize...");await browser.pause(5000);await global.checkAppCrash("after-pause");const initText=await browser.$("android=new UiSelector().textContains(\"INITIALIZED\")");await initText.waitForDisplayed({timeout:60000});await global.checkAppCrash("after-init");console.log("App initialized, clicking Run Automated Tests...");const button=await browser.$("android=new UiSelector().textContains(\"Run Automated Tests\")");await button.waitForDisplayed({timeout:15000});await button.click();console.log("Button clicked!");await browser.pause(5000);await global.checkAppCrash("after-click");},after:async function(result,capabilities,specs){try{const fs=require("fs");const path=require("path");const artifactDir=path.resolve(process.cwd(),"tests","artifacts");const artifactPath=path.join(artifactDir,"android-generated-images.zip");const remoteDirs=["/sdcard/Download/qvac-generated-images","/storage/emulated/0/Download/qvac-generated-images"];fs.mkdirSync(artifactDir,{recursive:true});if(typeof browser.pullFolder!=="function"){console.log("No Android generated image artifacts collected: browser.pullFolder is not available");return;}let saved=false;for(const remoteDir of remoteDirs){try{console.log("Attempting to pull generated images from "+remoteDir);const folderData=await browser.pullFolder(remoteDir);fs.writeFileSync(artifactPath,Buffer.from(folderData,"base64"));console.log("Saved generated image artifacts to "+artifactPath);saved=true;break;}catch(e){console.log("Could not pull Android generated images from "+remoteDir+": "+e.message);}}if(!saved){console.log("No Android generated image artifacts collected");}}catch(e){console.log("No Android generated image artifacts collected: "+e.message);}},afterTest:async function(test,context,{error}){if(global.appCrashed)return;await global.checkAppCrash("after-test:"+test.title);}};'
+ else
+ PLATFORM="iOS"
+ AUTOMATION="XCUITest"
+ # iOS 18+ requires macos_sequoia test host (supports iOS 15-26)
+ HOST_LINE="ios_test_host: macos_sequoia"
+ BUNDLE_ID="${{ env.APP_BUNDLE_ID }}"
+ # iOS wdio config with crash detection (bail:0 = continue on test failures, crash = process.exit)
+ # usePrebuiltWDA uses Device Farm's pre-built WebDriverAgent
+ # Increased timeout to 30 minutes (1800000ms) for long-running LLM tests
+ WDIO_CONFIG='exports.config={runner:"local",hostname:"127.0.0.1",port:4723,path:"/wd/hub",specs:["*.spec.js","*.test.js"],maxInstances:1,bail:0,capabilities:[{platformName:"iOS","appium:automationName":"XCUITest","appium:bundleId":"'${{ env.APP_BUNDLE_ID }}'","appium:newCommandTimeout":300,"appium:noReset":true,"appium:forceAppLaunch":false,"appium:usePrebuiltWDA":true,"appium:wdaLocalPort":8100,"appium:showIOSLog":true,"appium:realDeviceLogger":"/usr/local/lib/node_modules/appium/node_modules/deviceconsole/deviceconsole"}],logLevel:"debug",waitforTimeout:120000,connectionRetryTimeout:30000,connectionRetryCount:3,services:[],framework:"mocha",reporters:["spec"],mochaOpts:{ui:"bdd",timeout:1800000},before:async function(capabilities,specs,browser){const BUNDLE_ID="'${{ env.APP_BUNDLE_ID }}'";global.appCrashed=false;global.checkAppCrash=async(stage)=>{try{const state=await browser.queryAppState(BUNDLE_ID);console.log("["+stage+"] App state: "+state+" (4=foreground,3=background,1=not running)");if(state<3){console.error("\\n๐ APP CRASHED at "+stage+"! State="+state);console.error("Check device logs for BareKit/native errors.\\n");global.appCrashed=true;process.exit(1);}return state;}catch(e){console.log("["+stage+"] queryAppState error: "+e.message);return-1;}};console.log("Checking initial app state...");await global.checkAppCrash("startup");console.log("Waiting for app to initialize...");await browser.pause(5000);await global.checkAppCrash("after-pause");const initText=await browser.$("-ios predicate string:label CONTAINS \"INITIALIZED\"");await initText.waitForDisplayed({timeout:60000});await global.checkAppCrash("after-init");console.log("App initialized, clicking Run Automated Tests...");const button=await browser.$("-ios predicate string:label CONTAINS \"Run Automated Tests\"");await button.waitForDisplayed({timeout:15000});await button.click();console.log("Button clicked!");await browser.pause(5000);await global.checkAppCrash("after-click");},after:async function(result,capabilities,specs){try{const fs=require("fs");const path=require("path");const artifactDir=path.resolve(process.cwd(),"tests","artifacts");const remoteArtifactDir="@'${{ env.APP_BUNDLE_ID }}':documents/test/generated-images/";const artifactPath=path.join(artifactDir,"ios-generated-images.zip");fs.mkdirSync(artifactDir,{recursive:true});if(typeof browser.pullFolder!=="function"){console.log("No iOS generated image artifacts collected: browser.pullFolder is not available");return;}console.log("Attempting to pull generated images from "+remoteArtifactDir);const folderData=await browser.pullFolder(remoteArtifactDir);fs.writeFileSync(artifactPath,Buffer.from(folderData,"base64"));console.log("Saved generated image artifacts to "+artifactPath);}catch(e){console.log("No iOS generated image artifacts collected: "+e.message);}},afterTest:async function(test,context,{error}){if(global.appCrashed)return;await global.checkAppCrash("after-test:"+test.title);}};'
+ fi
+
+ # Base64 encode the wdio config to safely embed in YAML
+ # Note: macOS base64 doesn't support -w flag (no line wrapping by default)
+ WDIO_CONFIG_B64=$(echo "$WDIO_CONFIG" | base64 | tr -d '\n')
+
+ # Create test spec YAML using printf to avoid variable expansion issues
+ {
+ printf 'version: 0.1\n'
+ if [ -n "$HOST_LINE" ]; then
+ printf '%s\n' "$HOST_LINE"
+ fi
+ printf '\n'
+ printf 'phases:\n'
+ printf ' install:\n'
+ printf ' commands:\n'
+ printf ' - echo "Setting up Node.js environment..."\n'
+ printf ' - export NVM_DIR=$HOME/.nvm\n'
+ printf ' - . $NVM_DIR/nvm.sh 2>/dev/null || true\n'
+ printf ' - nvm install 18 2>/dev/null || true\n'
+ printf ' - nvm use 18 2>/dev/null || true\n'
+ printf ' - node --version || echo "Using system node"\n'
+ printf '\n'
+ printf ' pre_test:\n'
+ printf ' commands:\n'
+ printf ' - echo "Setting up test environment..."\n'
+ printf ' - cd $DEVICEFARM_TEST_PACKAGE_PATH\n'
+ printf ' - ls -la\n'
+ printf ' - echo "Installing dependencies (clean install)..."\n'
+ printf ' - rm -rf node_modules package-lock.json 2>/dev/null || true\n'
+ printf ' - npm install --legacy-peer-deps 2>&1\n'
+ printf ' - echo "Verifying wdio installation..."\n'
+ printf ' - ls -la node_modules/.bin/ | grep wdio || echo "wdio not found in .bin"\n'
+ printf ' - node node_modules/@wdio/cli/bin/wdio.js --version || echo "wdio version check failed"\n'
+ printf ' - echo "Creating wdio config for Device Farm..."\n'
+ printf ' - echo "%s" | base64 -d > tests/wdio.config.devicefarm.js\n' "$WDIO_CONFIG_B64"
+ printf ' - cat tests/wdio.config.devicefarm.js\n'
+
+ # iOS-specific WebDriverAgent configuration (only for iOS platform)
+ if [ "${{ matrix.platform }}" == "iOS" ]; then
+ printf ' - echo "๐ง Configuring WebDriverAgent for iOS..."\n'
+ printf ' - export DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH=$DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH_V9\n'
+ printf ' - echo "WDA Path: $DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH"\n'
+ fi
+
+ printf ' - echo "๐ Starting Appium server..."\n'
+ printf ' - export APPIUM_BASE_PATH=/wd/hub\n'
+ printf ' - |\n'
+ printf ' appium --base-path=$APPIUM_BASE_PATH --log-timestamp \\\n'
+ printf ' --log-no-colors --relaxed-security --default-capabilities \\\n'
+ printf ' "{\\"appium:deviceName\\": \\"$DEVICEFARM_DEVICE_NAME\\", \\\n'
+ printf ' \\"platformName\\": \\"$DEVICEFARM_DEVICE_PLATFORM_NAME\\", \\\n'
+ printf ' \\"appium:app\\": \\"$DEVICEFARM_APP_PATH\\", \\\n'
+ printf ' \\"appium:udid\\":\\"$DEVICEFARM_DEVICE_UDID\\", \\\n'
+ printf ' \\"appium:platformVersion\\": \\"$DEVICEFARM_DEVICE_OS_VERSION\\", \\\n'
+ printf ' \\"appium:chromedriverExecutableDir\\": \\"$DEVICEFARM_CHROMEDRIVER_EXECUTABLE_DIR\\", \\\n'
+ printf ' \\"appium:wdaLocalPort\\": 8100, \\\n'
+ printf ' \\"appium:derivedDataPath\\": \\"$DEVICEFARM_APPIUM_WDA_DERIVED_DATA_PATH\\", \\\n'
+ printf ' \\"appium:usePrebuiltWDA\\": true, \\\n'
+ printf ' \\"appium:automationName\\": \\"%s\\"}" \\\n' "$AUTOMATION"
+ printf ' >> $DEVICEFARM_LOG_DIR/appium.log 2>&1 &\n'
+ printf ' - echo "โณ Waiting for Appium to be ready (max 30 seconds)..."\n'
+ printf ' - |\n'
+ printf ' appium_initialization_time=0\n'
+ printf ' until curl --silent --fail "http://0.0.0.0:4723${APPIUM_BASE_PATH}/status"; do\n'
+ printf ' if [[ $appium_initialization_time -gt 30 ]]; then\n'
+ printf ' echo "โ Appium did not start within 30 seconds. Exiting..."\n'
+ printf ' cat $DEVICEFARM_LOG_DIR/appium.log\n'
+ printf ' exit 1\n'
+ printf ' fi\n'
+ printf ' appium_initialization_time=$((appium_initialization_time + 1))\n'
+ printf ' echo "Waiting for Appium to start on port 4723 (${appium_initialization_time}s/30s)..."\n'
+ printf ' sleep 1\n'
+ printf ' done\n'
+ printf ' - echo "โ
Appium server is ready!"\n'
+ printf ' - curl -s http://0.0.0.0:4723${APPIUM_BASE_PATH}/status || echo "Status check failed"\n'
+ printf ' - echo "โน๏ธ Button click handled via WebDriverIO before hook (single session)"\n'
+ printf '\n'
+ printf ' test:\n'
+ printf ' commands:\n'
+ printf ' - echo "๐งช Running WebDriverIO tests..."\n'
+ printf ' - cd $DEVICEFARM_TEST_PACKAGE_PATH\n'
+ printf ' - echo "Verifying Appium is still running..."\n'
+ printf ' - ps aux | grep appium | grep -v grep || echo "โ ๏ธ Appium process not found"\n'
+ printf ' - curl -s http://127.0.0.1:4723/wd/hub/status || echo "โ ๏ธ Appium status check failed"\n'
+
+ printf ' - echo "Starting wdio test execution..."\n'
+ printf ' - node node_modules/@wdio/cli/bin/wdio.js run tests/wdio.config.devicefarm.js\n'
+ printf '\n'
+ printf ' post_test:\n'
+ printf ' commands:\n'
+ printf ' - echo "Test completed"\n'
+ printf ' - cd $DEVICEFARM_TEST_PACKAGE_PATH\n'
+ printf ' - |\n'
+ printf ' if [ -d tests/artifacts ]; then\n'
+ printf ' mkdir -p "$DEVICEFARM_LOG_DIR/generated-images"\n'
+ printf ' if ls tests/artifacts/* >/dev/null 2>&1; then\n'
+ printf ' cp tests/artifacts/* "$DEVICEFARM_LOG_DIR/generated-images/"\n'
+ printf ' echo "Copied generated image artifacts to $DEVICEFARM_LOG_DIR/generated-images"\n'
+ printf ' else\n'
+ printf ' echo "No generated image artifacts found in tests/artifacts"\n'
+ printf ' fi\n'
+ printf ' else\n'
+ printf ' echo "No tests/artifacts directory found"\n'
+ printf ' fi\n'
+
+ # iOS-specific: Output captured device logs
+ if [ "${{ matrix.platform }}" == "iOS" ]; then
+ printf ' - echo ""\n'
+ printf ' - echo "๐ฑ ========== iOS Device Console Logs =========="\n'
+ printf ' - |\n'
+ printf ' if [ -f "$DEVICEFARM_LOG_DIR/device_console.log" ]; then\n'
+ printf ' echo "Device console log found, showing BareKit output:"\n'
+ printf ' grep -i "bare\|console\|model\|embedding\|test\|error" "$DEVICEFARM_LOG_DIR/device_console.log" || echo "No matching logs found"\n'
+ printf ' else\n'
+ printf ' echo "No device_console.log file found"\n'
+ printf ' fi\n'
+ printf ' - echo ""\n'
+ printf ' - echo "๐ Available log files:"\n'
+ printf ' - ls -lh $DEVICEFARM_LOG_DIR/ || echo "Log directory not accessible"\n'
+ fi
+ printf '\n'
+ printf 'artifacts:\n'
+ printf ' - $DEVICEFARM_LOG_DIR\n'
+ } > testspec.yml
+
+ echo "Generated test spec:"
+ echo "===================="
+ cat testspec.yml
+ echo "===================="
+
+ echo "๐ค Uploading test spec to Device Farm..."
+ SPEC_RESPONSE=$(aws devicefarm create-upload \
+ --project-arn "${{ secrets.LLM_AWS_DEVICE_FARM_PROJECT_ARN }}" \
+ --name "testspec.yml" \
+ --type "APPIUM_NODE_TEST_SPEC" \
+ --output json)
+
+ SPEC_UPLOAD_URL=$(echo $SPEC_RESPONSE | jq -r '.upload.url')
+ SPEC_UPLOAD_ARN=$(echo $SPEC_RESPONSE | jq -r '.upload.arn')
+ echo "test_spec_arn=$SPEC_UPLOAD_ARN" >> $GITHUB_OUTPUT
+
+ curl -T testspec.yml "$SPEC_UPLOAD_URL"
+
+ # Wait for processing
+ echo "โณ Waiting for test spec to be processed..."
+ MAX_ATTEMPTS=20
+ ATTEMPT=1
+ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
+ STATUS=$(aws devicefarm get-upload --arn "$SPEC_UPLOAD_ARN" --query "upload.status" --output text)
+ echo "Test spec status (attempt $ATTEMPT/$MAX_ATTEMPTS): $STATUS"
+
+ if [ "$STATUS" = "SUCCEEDED" ]; then
+ echo "โ
Test spec upload successful"
+ break
+ fi
+
+ if [ "$STATUS" = "FAILED" ]; then
+ echo "โ Test spec upload failed"
+ aws devicefarm get-upload --arn "$SPEC_UPLOAD_ARN"
+ exit 1
+ fi
+
+ sleep 5
+ ATTEMPT=$((ATTEMPT + 1))
+ done
+
+ - name: Schedule Device Farm Test Run
+ id: schedule_run
+ run: |
+ if [ "${{ matrix.platform }}" == "Android" ]; then
+ POOL_ARN="${{ secrets.LLM_ANDROID_DEVICE_POOL_ARN }}"
+ else
+ POOL_ARN="${{ secrets.LLM_IOS_DEVICE_POOL_ARN }}"
+ fi
+
+ # Set run name based on trigger
+ if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ RUN_NAME="Manual-${{ github.run_number }}-${{ matrix.platform }}"
+ else
+ RUN_NAME="PR-${{ github.event.pull_request.number || github.run_number }}-${{ matrix.platform }}"
+ fi
+
+ echo "๐ Scheduling Device Farm test run..."
+ echo "Platform: ${{ matrix.platform }}"
+ echo "Device Pool ARN: $POOL_ARN"
+ echo "Run Name: $RUN_NAME"
+
+ RUN_ARN=$(aws devicefarm schedule-run \
+ --project-arn "${{ secrets.LLM_AWS_DEVICE_FARM_PROJECT_ARN }}" \
+ --device-pool-arn "$POOL_ARN" \
+ --app-arn "${{ steps.upload_app.outputs.app_upload_arn }}" \
+ --name "$RUN_NAME" \
+ --test type=APPIUM_NODE,testPackageArn="${{ steps.upload_test_package.outputs.test_package_upload_arn }}",testSpecArn="${{ steps.upload_test_spec.outputs.test_spec_arn }}" \
+ --query 'run.arn' --output text)
+
+ echo "run_arn=$RUN_ARN" >> $GITHUB_OUTPUT
+ echo "โ
Test run scheduled: $RUN_ARN"
+
+ - name: Monitor Test Run
+ id: monitor_run
+ run: |
+ RUN_ARN="${{ steps.schedule_run.outputs.run_arn }}"
+ echo "๐ Monitoring test run: $RUN_ARN"
+ echo ""
+
+ MAX_WAIT_TIME=7200 # 120 minutes
+ ELAPSED=0
+
+ while true; do
+ STATUS=$(aws devicefarm get-run --arn "$RUN_ARN" --query 'run.status' --output text)
+ RESULT=$(aws devicefarm get-run --arn "$RUN_ARN" --query 'run.result' --output text)
+
+ echo "โณ Run status: $STATUS (Result: $RESULT) - Elapsed: ${ELAPSED}s"
+
+ if [[ "$STATUS" == "COMPLETED" ]]; then
+ echo ""
+ echo "โ
Test run completed!"
+ break
+ fi
+
+ if [ $ELAPSED -ge $MAX_WAIT_TIME ]; then
+ echo ""
+ echo "โ Timeout: Test run exceeded $MAX_WAIT_TIME seconds"
+ exit 1
+ fi
+
+ sleep 30
+ ELAPSED=$((ELAPSED + 30))
+ done
+
+ # Get detailed results
+ RUN_DETAILS=$(aws devicefarm get-run --arn "$RUN_ARN" --output json)
+ RESULT=$(echo $RUN_DETAILS | jq -r '.run.result')
+ COUNTERS=$(echo $RUN_DETAILS | jq -r '.run.counters')
+
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ FINAL TEST RESULTS"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "Result: $RESULT"
+ echo ""
+
+ # Get jobs (devices) and extract actual test names
+ echo "๐ฑ Fetching detailed test results..."
+ JOBS=$(aws devicefarm list-jobs --arn "$RUN_ARN" --output json)
+
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ YOUR TESTS (excluding Setup/Teardown)"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo ""
+
+ DEVICE_COUNT=0
+ USER_TEST_COUNT=0
+ USER_PASSED=0
+ USER_FAILED=0
+ FAILED_TEST_DETAILS=()
+
+ # Extract project ID and run ID from RUN_ARN for console links
+ # RUN_ARN format: arn:aws:devicefarm:us-west-2:ACCOUNT:run:PROJECT_ID/RUN_ID
+ PROJECT_ID=$(echo "$RUN_ARN" | sed -n 's/.*:run:\([^/]*\)\/.*/\1/p')
+ RUN_ID=$(echo "$RUN_ARN" | sed -n 's/.*:run:[^/]*\/\(.*\)/\1/p')
+
+ # Process each device/job
+ for JOB_ARN in $(echo "$JOBS" | jq -r '.jobs[].arn'); do
+ DEVICE_COUNT=$((DEVICE_COUNT + 1))
+ JOB_DETAILS=$(aws devicefarm get-job --arn "$JOB_ARN" --output json)
+ DEVICE_NAME=$(echo "$JOB_DETAILS" | jq -r '.job.device.name // "Unknown Device"')
+ JOB_RESULT=$(echo "$JOB_DETAILS" | jq -r '.job.result // "UNKNOWN"')
+ JOB_ID=$(echo "$JOB_ARN" | sed -n 's/.*:job:[^/]*\/[^/]*\/\(.*\)/\1/p')
+
+ # Build console link (no region param needed when region is in subdomain)
+ CONSOLE_LINK="https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/${PROJECT_ID}/runs/${RUN_ID}/jobs/${JOB_ID}"
+
+ if [ "$JOB_RESULT" = "PASSED" ]; then
+ echo " โ
$DEVICE_NAME: PASSED"
+ USER_PASSED=$((USER_PASSED + 1))
+ else
+ echo " โ $DEVICE_NAME: $JOB_RESULT"
+ USER_FAILED=$((USER_FAILED + 1))
+ FAILED_TEST_DETAILS+=("โ $DEVICE_NAME: $JOB_RESULT")
+ FAILED_TEST_DETAILS+=(" ๐ View logs: $CONSOLE_LINK")
+ fi
+
+ USER_TEST_COUNT=$((USER_TEST_COUNT + 1))
+ echo ""
+ done
+
+ # Show AWS Device Farm console link for the entire run
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ AWS DEVICE FARM LINKS"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo ""
+ echo "๐ Full Run Details:"
+ echo " https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/${PROJECT_ID}/runs/${RUN_ID}"
+ echo ""
+ echo "๐ก Tip: Click the link above, then select a device to view:"
+ echo " โข Video recording of the test"
+ echo " โข Screenshots"
+ echo " โข Device logs"
+ echo " โข Test spec output (shows individual test results)"
+ echo ""
+
+ # Summary
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ SUMMARY"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo ""
+ echo "Devices tested: $DEVICE_COUNT"
+ echo " โ
Passed: $USER_PASSED"
+ echo " โ Failed: $USER_FAILED"
+ echo ""
+ echo "๐ What these tests verify:"
+ echo " The E2E tests run on Device Farm check that your app:"
+ echo " 1. Shows 'INITIALIZED' after startup"
+ echo " 2. Runs all test functions from test/mobile/*.cjs"
+ echo " 3. Reports PASS/FAIL for each test function"
+ echo ""
+ echo "๐ก If a test times out but the video shows PASS:"
+ echo " โ The app test passed, but E2E gave up waiting too early"
+ echo " โ Check timeout settings in qvac-test-addon-mobile"
+ echo ""
+ echo "Device Farm Counters (includes Setup/Teardown):"
+ echo "$COUNTERS" | jq '.'
+ echo ""
+
+ if [ ${#FAILED_TEST_DETAILS[@]} -gt 0 ]; then
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "โ FAILED TESTS"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ for failed_info in "${FAILED_TEST_DETAILS[@]}"; do
+ echo "$failed_info"
+ done
+ echo ""
+ fi
+
+ # Save for PR comment
+ echo "test_result=$RESULT" >> $GITHUB_OUTPUT
+ echo "test_counters<> $GITHUB_OUTPUT
+ echo "$COUNTERS" >> $GITHUB_OUTPUT
+ echo "EOF" >> $GITHUB_OUTPUT
+
+ # Extract test counts
+ TOTAL=$(echo $COUNTERS | jq -r '.total // 0')
+ PASSED=$(echo $COUNTERS | jq -r '.passed // 0')
+ FAILED=$(echo $COUNTERS | jq -r '.failed // 0')
+ SKIPPED=$(echo $COUNTERS | jq -r '.skipped // 0')
+
+ echo "test_total=$TOTAL" >> $GITHUB_OUTPUT
+ echo "test_passed=$PASSED" >> $GITHUB_OUTPUT
+ echo "test_failed=$FAILED" >> $GITHUB_OUTPUT
+ echo "test_skipped=$SKIPPED" >> $GITHUB_OUTPUT
+
+ # Also save user test counts
+ echo "user_test_count=$USER_TEST_COUNT" >> $GITHUB_OUTPUT
+ echo "user_test_passed=$USER_PASSED" >> $GITHUB_OUTPUT
+ echo "user_test_failed=$USER_FAILED" >> $GITHUB_OUTPUT
+
+ # Determine if tests passed or failed
+ # Red status (exit 1) if:
+ # 1. Device Farm overall result is not PASSED, OR
+ # 2. Any of your tests failed
+ # Green status (exit 0) only if all tests passed
+
+ if [[ "$RESULT" != "PASSED" ]] || [ $USER_FAILED -gt 0 ]; then
+ echo ""
+ echo "โ Device Farm tests failed"
+ if [[ "$RESULT" != "PASSED" ]]; then
+ echo " Device Farm result: $RESULT"
+ fi
+ echo " Your tests: $USER_PASSED passed, $USER_FAILED failed (out of $USER_TEST_COUNT total)"
+ echo " Device Farm total: $TOTAL | Passed: $PASSED | Failed: $FAILED | Skipped: $SKIPPED"
+ exit 1
+ fi
+
+ echo ""
+ echo "โ
All Device Farm tests passed!"
+ echo " Your tests: $USER_PASSED passed (out of $USER_TEST_COUNT total)"
+ echo " Device Farm total: $TOTAL | Passed: $PASSED | Failed: $FAILED | Skipped: $SKIPPED"
+
+ - name: Refresh AWS credentials for log download
+ if: always() && steps.schedule_run.outputs.run_arn
+ uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # 6.0.0
+ with:
+ role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+ aws-region: us-west-2
+ role-session-name: device-farm-logs
+
+ - name: Download Device Farm Logs
+ if: always() && steps.schedule_run.outputs.run_arn
+ run: |
+ RUN_ARN="${{ steps.schedule_run.outputs.run_arn }}"
+ LOG_DIR="devicefarm-logs/${{ matrix.platform }}"
+ PLATFORM="${{ matrix.platform }}"
+ mkdir -p "$LOG_DIR"
+
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ฅ DOWNLOADING DEVICE FARM LOGS"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo ""
+ echo "Logs are downloaded so anyone with repo access can view them"
+ echo "without needing AWS Device Farm credentials."
+ if [ "$PLATFORM" = "Android" ]; then
+ echo "โน๏ธ Skipping video artifacts on Android to reduce artifact size."
+ fi
+ echo ""
+
+ RUN_DETAILS=$(aws devicefarm get-run --arn "$RUN_ARN" --output json 2>/dev/null || echo '{}')
+ RUN_LABEL=$(echo "$RUN_DETAILS" | jq -r '.run.name // "unknown"')
+ echo ""
+ echo "========================================"
+ echo "๐ฆ Run: $RUN_LABEL"
+ echo "========================================"
+
+ SAFE_RUN=$(echo "$RUN_LABEL" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+ JOBS=$(aws devicefarm list-jobs --arn "$RUN_ARN" --output json 2>/dev/null || echo '{"jobs":[]}')
+
+ for JOB_ARN in $(echo "$JOBS" | jq -r '.jobs[].arn'); do
+ DEVICE_NAME=$(echo "$JOBS" | jq -r --arg arn "$JOB_ARN" '.jobs[] | select(.arn == $arn) | .device.name // "unknown"')
+ JOB_RESULT=$(echo "$JOBS" | jq -r --arg arn "$JOB_ARN" '.jobs[] | select(.arn == $arn) | .result // "UNKNOWN"')
+ SAFE_NAME=$(echo "$DEVICE_NAME" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ฑ $DEVICE_NAME ($JOB_RESULT)"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+
+ SUITES=$(aws devicefarm list-suites --arn "$JOB_ARN" --output json 2>/dev/null || echo '{"suites":[]}')
+
+ for SUITE_ARN in $(echo "$SUITES" | jq -r '.suites[].arn'); do
+ SUITE_NAME=$(echo "$SUITES" | jq -r --arg arn "$SUITE_ARN" '.suites[] | select(.arn == $arn) | .name // "unknown"')
+ SAFE_SUITE=$(echo "$SUITE_NAME" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+
+ ARTIFACTS=$(aws devicefarm list-artifacts --arn "$SUITE_ARN" --type FILE --output json 2>/dev/null || echo '{"artifacts":[]}')
+
+ echo "$ARTIFACTS" | jq -c '.artifacts[]' 2>/dev/null | while read -r ARTIFACT; do
+ ART_NAME=$(echo "$ARTIFACT" | jq -r '.name // "unknown"')
+ ART_URL=$(echo "$ARTIFACT" | jq -r '.url // empty')
+ ART_EXT=$(echo "$ARTIFACT" | jq -r '.extension // "txt"')
+ [ -z "$ART_URL" ] && continue
+
+ if [ "$PLATFORM" = "Android" ]; then
+ if echo "$ART_NAME" | grep -qiE "^video$" || echo "$ART_EXT" | grep -qiE "^mp4$"; then
+ echo " Skipped (video): $SUITE_NAME / $ART_NAME"
+ continue
+ fi
+ fi
+
+ SAFE_ART=$(echo "$ART_NAME" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+ DEST="$LOG_DIR/${SAFE_RUN}_${SAFE_NAME}_${SAFE_SUITE}_${SAFE_ART}.${ART_EXT}"
+
+ if curl -fsSL -o "$DEST" "$ART_URL" 2>/dev/null; then
+ echo " Downloaded: $SUITE_NAME / $ART_NAME"
+
+ if echo "$ART_NAME" | grep -qiE "test.spec|testspec"; then
+ echo ""
+ echo "::group::๐ [$DEVICE_NAME] $SUITE_NAME โ $ART_NAME"
+ cat "$DEST" 2>/dev/null || true
+ echo "::endgroup::"
+ fi
+ fi
+ done
+
+ LOG_ARTIFACTS=$(aws devicefarm list-artifacts --arn "$SUITE_ARN" --type LOG --output json 2>/dev/null || echo '{"artifacts":[]}')
+
+ echo "$LOG_ARTIFACTS" | jq -c '.artifacts[]' 2>/dev/null | while read -r ARTIFACT; do
+ ART_NAME=$(echo "$ARTIFACT" | jq -r '.name // "unknown"')
+ ART_URL=$(echo "$ARTIFACT" | jq -r '.url // empty')
+ ART_EXT=$(echo "$ARTIFACT" | jq -r '.extension // "txt"')
+ [ -z "$ART_URL" ] && continue
+
+ SAFE_ART=$(echo "$ART_NAME" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+ DEST="$LOG_DIR/${SAFE_RUN}_${SAFE_NAME}_${SAFE_SUITE}_${SAFE_ART}.${ART_EXT}"
+
+ if curl -fsSL -o "$DEST" "$ART_URL" 2>/dev/null; then
+ echo " Downloaded: $SUITE_NAME / $ART_NAME (LOG)"
+ fi
+ done
+ done
+
+ JOB_ARTIFACTS=$(aws devicefarm list-artifacts --arn "$JOB_ARN" --type FILE --output json 2>/dev/null || echo '{"artifacts":[]}')
+ echo "$JOB_ARTIFACTS" | jq -c '.artifacts[]' 2>/dev/null | while read -r ARTIFACT; do
+ ART_NAME=$(echo "$ARTIFACT" | jq -r '.name // "unknown"')
+ ART_URL=$(echo "$ARTIFACT" | jq -r '.url // empty')
+ ART_EXT=$(echo "$ARTIFACT" | jq -r '.extension // "txt"')
+ [ -z "$ART_URL" ] && continue
+
+ if [ "$PLATFORM" = "Android" ]; then
+ if echo "$ART_NAME" | grep -qiE "^video$" || echo "$ART_EXT" | grep -qiE "^mp4$"; then
+ echo " Skipped (video): job-level / $ART_NAME"
+ continue
+ fi
+ fi
+
+ SAFE_ART=$(echo "$ART_NAME" | tr ' /' '__' | tr -cd '[:alnum:]_-')
+ DEST="$LOG_DIR/${SAFE_RUN}_${SAFE_NAME}_job_${SAFE_ART}.${ART_EXT}"
+
+ if curl -fsSL -o "$DEST" "$ART_URL" 2>/dev/null; then
+ echo " Downloaded (job-level): $ART_NAME"
+ fi
+ done
+ done
+
+ echo ""
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ echo "๐ฆ All downloaded logs:"
+ echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
+ find "$LOG_DIR" -type f -exec ls -lh {} \; 2>/dev/null || echo " (no logs downloaded)"
+
+ - name: Upload Device Farm Logs
+ if: always() && steps.schedule_run.outputs.run_arn
+ uses: actions/upload-artifact@v4
+ with:
+ name: devicefarm-logs-classification-ggml-${{ matrix.platform }}
+ path: devicefarm-logs/
+ retention-days: 30
+ if-no-files-found: ignore
diff --git a/.github/workflows/integration-test-classification-ggml.yml b/.github/workflows/integration-test-classification-ggml.yml
new file mode 100644
index 0000000000..43c076ea61
--- /dev/null
+++ b/.github/workflows/integration-test-classification-ggml.yml
@@ -0,0 +1,178 @@
+name: Integration Tests (GGML Classification)
+
+on:
+ workflow_dispatch:
+ inputs:
+ prebuild_package:
+ description: "NPM package containing prebuilds (e.g. @qvac/classification-ggml@0.1.0)"
+ required: false
+ type: string
+ workflow_call:
+ inputs:
+ ref:
+ description: "ref"
+ type: string
+ required: false
+ repository:
+ type: string
+ required: false
+ default: "tetherto/qvac"
+ workdir:
+ description: "Working directory inside the repo (monorepo package path)"
+ type: string
+ required: false
+ default: "packages/classification-ggml"
+ prebuild_package:
+ description: "NPM package containing prebuilds (e.g. @qvac/classification-ggml@0.1.0). When set, prebuilds are downloaded from npm instead of from per-PR build artifacts."
+ type: string
+ required: false
+
+env:
+ PKG_DIR: packages/classification-ggml
+
+jobs:
+ run-integration-tests:
+ timeout-minutes: 30
+ runs-on: ${{ matrix.os }}
+ environment: release
+ name: ${{ matrix.platform }}-${{ matrix.arch }}-integration-tests
+ permissions:
+ contents: read
+ packages: read
+ id-token: write
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: ubuntu-22.04
+ platform: linux
+ arch: x64
+ - os: ubuntu-24.04
+ platform: linux
+ arch: x64
+ - os: ubuntu-22.04-arm
+ platform: linux
+ arch: arm64
+ - os: macos-14
+ platform: darwin
+ arch: arm64
+ - os: windows-2022
+ platform: win32
+ arch: x64
+
+ steps:
+ - name: Setup Node.js
+ uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # 6.3.0
+ with:
+ node-version: lts/*
+
+ - name: Windows โ enable git long paths
+ if: ${{ matrix.platform == 'win32' }}
+ shell: bash
+ run: git config --system core.longpaths true
+
+ - name: Checkout code
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ repository: ${{ inputs.repository || github.repository }}
+ ref: ${{ inputs.ref || github.ref }}
+ token: ${{ secrets.PAT_TOKEN }}
+
+ - name: Setup Bare runtime
+ uses: tetherto/qvac/.github/actions/setup-bare-tooling@0bbdca93da303a0b1634ba14a89cec085621078d
+
+ - name: Install npm dependencies
+ working-directory: ${{ env.PKG_DIR }}
+ run: npm install --ignore-scripts
+
+ - name: Download prebuild artifacts
+ if: ${{ !inputs.prebuild_package }}
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
+ with:
+ pattern: classification-ggml-${{ matrix.platform }}-${{ matrix.arch }}*
+ path: ${{ env.PKG_DIR }}/prebuilds
+ merge-multiple: true
+
+ - name: Download prebuilds from package (Linux/macOS)
+ if: ${{ inputs.prebuild_package && matrix.platform != 'win32' }}
+ working-directory: ${{ env.PKG_DIR }}
+ shell: bash
+ run: |
+ PACKAGE="${{ inputs.prebuild_package }}"
+ echo "Downloading $PACKAGE from npm..."
+
+ if ! npm pack "$PACKAGE" --ignore-scripts; then
+ echo "ERROR: Failed to download $PACKAGE from npm"
+ echo "Please check that the package exists"
+ exit 1
+ fi
+
+ tar -xzf *.tgz
+
+ if [ ! -d "package/prebuilds" ]; then
+ echo "ERROR: No prebuilds directory found in package"
+ echo "The downloaded package may not contain prebuilt binaries"
+ exit 1
+ fi
+
+ mv package/prebuilds ./prebuilds
+ rm -rf package *.tgz
+
+ echo "Prebuilds downloaded from npm successfully"
+ ls -la prebuilds/
+
+ - name: Download prebuilds from package (Windows)
+ if: ${{ inputs.prebuild_package && matrix.platform == 'win32' }}
+ working-directory: ${{ env.PKG_DIR }}
+ shell: powershell
+ run: |
+ $PACKAGE = "${{ inputs.prebuild_package }}"
+ echo "Downloading $PACKAGE from npm..."
+
+ npm pack $PACKAGE --ignore-scripts
+ if ($LASTEXITCODE -ne 0) {
+ echo "ERROR: Failed to download $PACKAGE from npm"
+ echo "Please check that the package exists"
+ exit 1
+ }
+
+ $TARBALL = Get-ChildItem -Filter "*.tgz" | Select-Object -First 1 -ExpandProperty Name
+ if (-not $TARBALL) {
+ echo "ERROR: Could not find downloaded tarball"
+ exit 1
+ }
+
+ echo "Extracting $TARBALL..."
+ tar -xzf $TARBALL
+
+ if (-not (Test-Path "package/prebuilds")) {
+ echo "ERROR: No prebuilds directory found in package"
+ echo "The downloaded package may not contain prebuilt binaries"
+ exit 1
+ }
+
+ Move-Item package/prebuilds ./prebuilds
+ Remove-Item -Recurse -Force package
+ Remove-Item $TARBALL
+
+ echo "Prebuilds downloaded from npm successfully"
+ Get-ChildItem prebuilds/
+
+ - name: Run JS integration tests
+ working-directory: ${{ env.PKG_DIR }}
+ env:
+ # Opt in to the C++ per-inference trace so CI logs contain
+ # raw logits / probs / sorted results for every classification.
+ # Invaluable for debugging platform-specific numerical issues
+ # (noise is bounded to a few lines per sample image).
+ QVAC_CLASSIFICATION_TRACE: "1"
+ run: npm run test:integration
+
+ - name: Upload performance report
+ if: always()
+ uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2
+ with:
+ name: classification-perf-report-${{ matrix.platform }}-${{ matrix.arch }}
+ path: ${{ env.PKG_DIR }}/test/results/performance-report.json
+ if-no-files-found: ignore
+ retention-days: 30
diff --git a/.github/workflows/on-merge-classification-ggml.yml b/.github/workflows/on-merge-classification-ggml.yml
new file mode 100644
index 0000000000..f43dc2d1bc
--- /dev/null
+++ b/.github/workflows/on-merge-classification-ggml.yml
@@ -0,0 +1,218 @@
+name: On Merge Trigger (Classification-ggml)
+
+on:
+ push:
+ branches:
+ - main
+ - release-*
+ - feature-*
+ - tmp-*
+ paths:
+ - "packages/classification-ggml/**"
+ - ".github/workflows/*classification-ggml*.yml"
+ workflow_dispatch:
+ inputs:
+ tag:
+ description: "Tag to publish with"
+ required: false
+ default: "dev"
+ type: choice
+ options:
+ - latest
+ - dev
+ npm_tag:
+ description: "NPM dist-tag (default: latest). e.g. release-1.x"
+ required: false
+ default: ""
+ type: string
+
+permissions:
+ contents: read
+ pull-requests: read
+ packages: read
+ id-token: write
+
+jobs:
+ publish-logic:
+ runs-on: ubuntu-latest
+ outputs:
+ publish_main: ${{ steps.logic.outputs.publish_main }}
+ publish_release: ${{ steps.logic.outputs.publish_release }}
+ publish_feature: ${{ steps.logic.outputs.publish_feature }}
+ publish_tmp: ${{ steps.logic.outputs.publish_tmp }}
+ gpr_tag: ${{ steps.logic.outputs.gpr_tag }}
+ steps:
+ - id: logic
+ shell: bash
+ env:
+ INPUT_TAG: ${{ inputs.tag }}
+ run: |
+ set -euo pipefail
+ ref_name="${GITHUB_REF_NAME}"
+ event_name="${GITHUB_EVENT_NAME}"
+ input_tag="${INPUT_TAG}"
+
+ publish_main="false"
+ publish_release="false"
+ publish_feature="false"
+ publish_tmp="false"
+
+ if [ "$event_name" = "push" ] || [ "$event_name" = "workflow_dispatch" ]; then
+ if [ "$ref_name" = "main" ]; then
+ publish_main="true"
+ elif [[ "$ref_name" == release-* ]]; then
+ publish_release="true"
+ elif [[ "$ref_name" == feature-* ]]; then
+ publish_feature="true"
+ elif [[ "$ref_name" == tmp-* ]]; then
+ publish_tmp="true"
+ fi
+ fi
+
+ gpr_tag="$input_tag"
+ if [ -z "$gpr_tag" ]; then
+ if [ "$ref_name" = "main" ]; then
+ gpr_tag="dev"
+ elif [[ "$ref_name" == feature-* ]]; then
+ gpr_tag="feature"
+ elif [[ "$ref_name" == tmp-* ]]; then
+ gpr_tag="temp"
+ else
+ gpr_tag="dev"
+ fi
+ fi
+
+ echo "publish_main=$publish_main" >> "$GITHUB_OUTPUT"
+ echo "publish_release=$publish_release" >> "$GITHUB_OUTPUT"
+ echo "publish_feature=$publish_feature" >> "$GITHUB_OUTPUT"
+ echo "publish_tmp=$publish_tmp" >> "$GITHUB_OUTPUT"
+ echo "gpr_tag=$gpr_tag" >> "$GITHUB_OUTPUT"
+
+ release-merge-guard:
+ name: Release Merge Guard
+ if: >-
+ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') &&
+ startsWith(github.ref_name, 'release-')
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ fetch-depth: 0
+ - uses: ./.github/actions/release-merge-guard
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ base-ref: ${{ github.ref_name }}
+ base-sha: ${{ github.event.before }}
+ head-sha: ${{ github.sha }}
+ package-slug: classification-ggml
+ package-json-path: packages/classification-ggml/package.json
+ changelog-path: packages/classification-ggml/CHANGELOG.md
+
+ run-integration-tests:
+ needs: publish-logic
+ if: |
+ needs.publish-logic.outputs.publish_main == 'true' ||
+ needs.publish-logic.outputs.publish_release == 'true' ||
+ needs.publish-logic.outputs.publish_feature == 'true' ||
+ needs.publish-logic.outputs.publish_tmp == 'true'
+ uses: ./.github/workflows/integration-test-classification-ggml.yml
+ secrets: inherit
+ with:
+ repository: ${{ github.repository }}
+ ref: ${{ github.sha }}
+ workdir: "packages/classification-ggml"
+
+ mobile-integration-tests:
+ needs: publish-logic
+ permissions:
+ contents: read
+ packages: read
+ pull-requests: write
+ id-token: write
+ if: |
+ needs.publish-logic.outputs.publish_main == 'true' ||
+ needs.publish-logic.outputs.publish_release == 'true' ||
+ needs.publish-logic.outputs.publish_feature == 'true' ||
+ needs.publish-logic.outputs.publish_tmp == 'true'
+ uses: ./.github/workflows/integration-mobile-test-classification-ggml.yml
+ secrets: inherit
+ with:
+ repository: ${{ github.repository }}
+ ref: ${{ github.sha }}
+
+ publish-gpr:
+ needs: [publish-logic, run-integration-tests, mobile-integration-tests]
+ if: |
+ needs.publish-logic.outputs.publish_main == 'true' ||
+ needs.publish-logic.outputs.publish_feature == 'true' ||
+ needs.publish-logic.outputs.publish_tmp == 'true'
+ runs-on: ubuntu-latest
+ environment: release
+ permissions:
+ contents: write
+ packages: write
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ fetch-depth: 0
+
+ - name: Publish to GitHub Packages
+ uses: ./.github/actions/publish-library-to-gpr
+ with:
+ secret-token: ${{ secrets.GITHUB_TOKEN }}
+ npm-token: ${{ secrets.NPM_TOKEN }}
+ tag: ${{ needs.publish-logic.outputs.gpr_tag }}
+ workdir: "packages/classification-ggml"
+ name-suffix: "-mono"
+
+ publish-release-npm:
+ needs: [publish-logic, release-merge-guard, run-integration-tests, mobile-integration-tests]
+ if: |
+ !cancelled() &&
+ needs.publish-logic.outputs.publish_release == 'true' &&
+ needs.release-merge-guard.result == 'success'
+ runs-on: ubuntu-latest
+ environment: release
+ outputs:
+ published_version: ${{ steps.publish.outputs.npm_published_version }}
+ permissions:
+ contents: write
+ packages: write
+ id-token: write
+ steps:
+ - name: Validate npm_tag input
+ if: inputs.npm_tag != ''
+ shell: bash
+ run: |
+ tag="${{ inputs.npm_tag }}"
+ if ! echo "$tag" | grep -qE '^[a-zA-Z0-9][a-zA-Z0-9._-]*$'; then
+ echo "::error::Invalid npm dist-tag '$tag'. Must match ^[a-zA-Z0-9][a-zA-Z0-9._-]*$ (e.g. release-1.x)"
+ exit 1
+ fi
+
+ - name: Checkout repository
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+
+ - name: Publish to NPM Package Registry
+ id: publish
+ uses: ./.github/actions/publish-library-to-npm
+ with:
+ tag: ${{ inputs.npm_tag || 'latest' }}
+ workdir: "packages/classification-ggml"
+
+ publish-release:
+ needs: [publish-release-npm]
+ if: |
+ !cancelled() &&
+ needs.publish-release-npm.result == 'success' &&
+ needs.publish-release-npm.outputs.published_version != ''
+ permissions:
+ contents: write
+ uses: ./.github/workflows/create-github-release.yml
+ with:
+ repo_name: "classification-ggml"
+ release_name: "QVAC GGML Image Classification Lib"
+ published_version: ${{ needs.publish-release-npm.outputs.published_version }}
+ prev_sha: ${{ github.event.before }}
+ workdir: "packages/classification-ggml"
diff --git a/.github/workflows/on-pr-classification-ggml.yml b/.github/workflows/on-pr-classification-ggml.yml
new file mode 100644
index 0000000000..49a3df1821
--- /dev/null
+++ b/.github/workflows/on-pr-classification-ggml.yml
@@ -0,0 +1,192 @@
+name: On PR Trigger (Classification-ggml)
+
+on:
+ pull_request_target:
+ types:
+ - opened
+ - synchronize
+ - reopened
+ - labeled
+ branches:
+ - main
+ - release-*
+ - feature-*
+ - tmp-*
+ paths:
+ - "packages/classification-ggml/**"
+ - ".github/workflows/*classification-ggml*.yml"
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ pull-requests: read
+ packages: read
+ id-token: write
+
+env:
+ PKG_DIR: packages/classification-ggml
+
+jobs:
+ authorize:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: write
+ outputs:
+ allowed: ${{ steps.auth.outputs.allowed }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ - name: Authorize
+ id: auth
+ uses: ./.github/actions/authorize-pr
+ with:
+ github-token: ${{ github.token }}
+
+ changes:
+ if: github.event_name != 'workflow_dispatch'
+ runs-on: ubuntu-latest
+ outputs:
+ pkg: ${{ steps.filter.outputs.pkg }}
+ steps:
+ - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # 4.0.1
+ id: filter
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ filters: |
+ pkg:
+ - "packages/classification-ggml/**"
+ - ".github/workflows/*classification-ggml*.yml"
+
+ sanity-checks:
+ needs: [authorize, changes]
+ if: always() && ((needs.changes.outputs.pkg == 'true' && needs.authorize.outputs.allowed == 'true') || github.event_name == 'workflow_dispatch')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ with:
+ fetch-depth: 0
+
+ - name: Run Sanity checks
+ uses: ./.github/actions/sanity-checks
+ with:
+ secret-token: ${{ secrets.GITHUB_TOKEN }}
+ pat-token: ${{ secrets.PAT_TOKEN }}
+ run-integration: ${{ needs.authorize.outputs.allowed == 'true' }}
+ workdir: packages/classification-ggml
+
+ ts-checks:
+ needs: [authorize, changes, sanity-checks]
+ if: always() && needs.authorize.outputs.allowed == 'true' && (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ working-directory: ${{ env.PKG_DIR }}
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
+ - name: Set up Node.js
+ uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # 6.3.0
+ with:
+ node-version: 20
+ - name: Install dependencies
+ run: npm install --ignore-scripts
+ - name: Type declaration check
+ run: npm run test:dts
+
+ cpp-lint:
+ needs: [authorize, changes, sanity-checks]
+ if: |
+ always() &&
+ needs.authorize.outputs.allowed == 'true' &&
+ (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ uses: ./.github/workflows/cpp-lint.yaml
+ secrets: inherit
+ with:
+ sha: ${{ github.event.pull_request.base.sha || github.event.before || 'HEAD~1' }}
+ pr_head_sha: ${{ github.event.pull_request.head.sha || github.sha }}
+ workdir: packages/classification-ggml
+
+ cpp-tests:
+ needs: [authorize, changes, sanity-checks]
+ if: |
+ always() &&
+ needs.authorize.outputs.allowed == 'true' &&
+ (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ uses: ./.github/workflows/cpp-tests-classification.yml
+ secrets: inherit
+ with:
+ workdir: packages/classification-ggml
+ repository: ${{ github.event.pull_request.head.repo.full_name }}
+ ref: ${{ github.event.pull_request.head.ref }}
+
+ prebuild:
+ permissions:
+ contents: write
+ packages: write
+ pull-requests: write
+ id-token: write
+ needs: [authorize, changes, sanity-checks]
+ if: |
+ always() &&
+ needs.authorize.outputs.allowed == 'true' &&
+ (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ uses: ./.github/workflows/prebuilds-classification-ggml.yml
+ secrets: inherit
+ with:
+ repository: ${{ github.event.pull_request.head.repo.full_name }}
+ ref: ${{ github.event.pull_request.head.ref }}
+
+ run-integration-tests:
+ needs: [authorize, changes, sanity-checks, prebuild]
+ if: |
+ always() &&
+ needs.authorize.outputs.allowed == 'true' &&
+ (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ permissions:
+ contents: read
+ packages: read
+ id-token: write
+ uses: ./.github/workflows/integration-test-classification-ggml.yml
+ secrets: inherit
+ with:
+ repository: ${{ github.event.pull_request.head.repo.full_name }}
+ ref: ${{ github.event.pull_request.head.ref }}
+
+ run-mobile-integration-tests:
+ permissions:
+ contents: read
+ packages: read
+ pull-requests: write # Allow commenting on PRs
+ id-token: write
+ needs: [authorize, changes, sanity-checks, prebuild]
+ if: |
+ always() &&
+ needs.authorize.outputs.allowed == 'true' &&
+ (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ uses: ./.github/workflows/integration-mobile-test-classification-ggml.yml
+ secrets: inherit
+ with:
+ repository: ${{ github.event.pull_request.head.repo.full_name }}
+ ref: ${{ github.event.pull_request.head.ref }}
+
+ merge-guard:
+ needs:
+ [
+ authorize,
+ changes,
+ sanity-checks,
+ ts-checks,
+ cpp-lint,
+ cpp-tests,
+ prebuild,
+ run-integration-tests,
+ run-mobile-integration-tests,
+ ]
+ if: always() && (needs.changes.outputs.pkg == 'true' || github.event_name == 'workflow_dispatch')
+ uses: ./.github/workflows/public-pr.yml
+ with:
+ sanity-checks-status: ${{ needs.sanity-checks.result == 'success' && needs.ts-checks.result == 'success' && needs.cpp-lint.result == 'success' && needs.cpp-tests.result == 'success' }}
+ build-status: ${{ needs.prebuild.result == 'success' }}
+ integration-tests-status: ${{ (needs.run-integration-tests.result == 'success' || needs.run-integration-tests.result == 'skipped') && (needs.run-mobile-integration-tests.result == 'success' || needs.run-mobile-integration-tests.result == 'skipped') }}
diff --git a/.github/workflows/on-pr-close-classification-ggml.yml b/.github/workflows/on-pr-close-classification-ggml.yml
new file mode 100644
index 0000000000..f2569f568c
--- /dev/null
+++ b/.github/workflows/on-pr-close-classification-ggml.yml
@@ -0,0 +1,66 @@
+name: On PR Close (Classification-ggml)
+
+on:
+ pull_request:
+ types:
+ - closed
+ paths:
+ - "packages/classification-ggml/**"
+ - ".github/workflows/*classification-ggml*.yml"
+ workflow_dispatch:
+ inputs:
+ version:
+ description: "Specific version to target for deletion"
+ required: false
+ type: string
+ pr-number:
+ description: "PR number to target for deletion"
+ required: false
+ type: string
+ pattern:
+ description: "Pattern to target for deletion"
+ required: false
+ type: string
+ packages:
+ description: "Packages to target for deletion, space separated"
+ required: false
+ type: string
+ default: "classification-ggml"
+ dry-run:
+ description: "Is dry run? If true, lists versions without deleting."
+ type: boolean
+ default: true
+
+run-name: >-
+ Delete NPM Versions (classification-ggml)
+ v=${{ inputs.version }}
+ pr=${{ github.event_name == 'pull_request' && github.event.pull_request.number || inputs.pr-number }}
+ dry=${{ github.event_name == 'pull_request' && true || inputs.dry-run }}
+
+permissions:
+ contents: read
+
+jobs:
+ print-context:
+ runs-on: ubuntu-latest
+ continue-on-error: true
+ steps:
+ - run: |
+ echo "Inputs Context: $INPUTS_CONTEXT"
+ env:
+ INPUTS_CONTEXT: ${{ toJSON(inputs) }}
+ - run: |
+ echo "GitHub Context: $GITHUB_CONTEXT"
+ env:
+ GITHUB_CONTEXT: ${{ toJSON(github) }}
+
+ delete-npm-versions-trigger:
+ permissions:
+ packages: write
+ uses: ./.github/workflows/public-delete-npm-versions.yml
+ with:
+ version: ${{ inputs.version }}
+ pr-number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || inputs.pr-number }}
+ pattern: ${{ inputs.pattern }}
+ packages: ${{ inputs.packages || 'classification-ggml' }}
+ dry-run: ${{ github.event_name == 'pull_request' && true || inputs.dry-run }}
diff --git a/.github/workflows/prebuilds-classification-ggml.yml b/.github/workflows/prebuilds-classification-ggml.yml
new file mode 100644
index 0000000000..d5d304e70b
--- /dev/null
+++ b/.github/workflows/prebuilds-classification-ggml.yml
@@ -0,0 +1,44 @@
+# Build-only workflow. Publishing lives in on-merge-classification-ggml.yml.
+name: Prebuilds (GGML Classification)
+
+on:
+ workflow_dispatch:
+ inputs:
+ workdir:
+ description: "Working directory (optional)"
+ required: false
+ type: string
+ default: "packages/classification-ggml"
+
+ workflow_call:
+ inputs:
+ ref:
+ description: "ref"
+ type: string
+ required: false
+ repository:
+ type: string
+ required: false
+ default: "tetherto/qvac"
+ workdir:
+ description: "Working directory (optional)"
+ required: false
+ type: string
+ default: "packages/classification-ggml"
+
+permissions:
+ contents: read
+
+jobs:
+ prebuild:
+ permissions:
+ contents: write
+ pull-requests: write
+ id-token: write
+ uses: ./.github/workflows/reusable-prebuilds.yml
+ with:
+ workdir: ${{ inputs.workdir }}
+ ref: ${{ inputs.ref }}
+ repository: ${{ inputs.repository }}
+ artifact-name-prefix: classification-ggml-
+ secrets: inherit
diff --git a/packages/classification-ggml/.gitignore b/packages/classification-ggml/.gitignore
new file mode 100644
index 0000000000..1a1004453c
--- /dev/null
+++ b/packages/classification-ggml/.gitignore
@@ -0,0 +1,26 @@
+node_modules/
+package-lock.json
+build/
+prebuilds/
+dist/
+vcpkg_installed/
+.vcpkg/
+vcpkg/buildtrees/
+vcpkg/downloads/
+vcpkg/packages/
+vcpkg/installed/
+compile_commands.json
+.clang-format
+.clang-tidy
+.valgrind.supp
+*.log
+
+# Auto-generated performance report from local test runs (CI uploads its own).
+test/results/
+
+# Internal validation set (confidential, must never be committed).
+test/images/internal/
+proposal/dataset_val/
+
+# Device Farm logs downloaded for local diagnostic; never committed.
+remote_logs/
diff --git a/packages/classification-ggml/CHANGELOG.md b/packages/classification-ggml/CHANGELOG.md
new file mode 100644
index 0000000000..f243c3fb73
--- /dev/null
+++ b/packages/classification-ggml/CHANGELOG.md
@@ -0,0 +1,52 @@
+# Changelog
+
+All notable changes to `@qvac/classification-ggml` will be documented in
+this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to
+[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.1.0] โ Unreleased
+
+### Added
+
+- Initial release of the GGML image classification addon.
+- `ImageClassifier` public API (`load`, `classify`, `unload`) orchestrated
+ via `@qvac/infer-base`'s `createJobHandler` + `exclusiveRunQueue`,
+ mirroring the lifecycle pattern used by `@qvac/llm-llamacpp`.
+- C++ `ClassificationModel` implementing the MobileNetV3-Small architecture
+ directly against `libggml` (34 conv + 2 linear layers, with depthwise
+ separable convolutions, HardSwish activations, and squeeze-and-excite
+ blocks). BatchNorm is folded into the preceding convolution at load time
+ via `foldBn()` (`eps = 0.001`); the runtime graph evaluates only the
+ resulting scale/shift, with no per-inference BN op.
+- FP16 GGUF weights (2.94 MB) bundled in `weights/` and loaded with
+ `gguf_init_from_file()` + `ggml_backend_tensor_set()`.
+- Image preprocessing pipeline: JPEG / PNG decode via `stb_image`, bilinear
+ resize to 224x224, ImageNet-normalization, WHCN tensor layout.
+- Integration tests (brittle + bare) covering happy path, raw-RGB input,
+ edge cases, and lifecycle errors.
+- C++ unit tests (GoogleTest) covering graph construction, BN epsilon,
+ softmax normalization, and FP16 weight loading.
+- ONNX-to-GGUF conversion guide in `docs/onnx-to-gguf-conversion.md`.
+- `nativeLogger` constructor option (default `false`) that gates the shared
+ native C++โJS logger bridge; off by default because the underlying
+ `qvac-lib-inference-addon-cpp` `JsLogger` singleton's static `uv_async_t`
+ lifecycle is not safe across rapid create/destroy cycles. JS-level
+ logging always routes through the caller's `logger`.
+
+### Removed
+
+- `threads` constructor option. libggml's CPU thread pool now sizes itself
+ to `std::thread::hardware_concurrency` on every platform. The knob was
+ unimplementable on Android (the `ggml_backend_cpu_set_n_threads` symbol
+ lives inside the per-microarch CPU variant `.so` loaded via `dlopen`,
+ not in the addon's statically-linked `.bare`), and exposing it only on
+ desktop / iOS would have produced silently inconsistent behaviour across
+ platforms. Removed for API consistency.
+
+> **Note.** SDK plugin / schema integration (canonical model type
+> `ggml-classification` with `classification` alias) is **out of scope** for
+> 0.1.0 and will land in a follow-up PR; see the PR description for the
+> rationale.
diff --git a/packages/classification-ggml/CMakeLists.txt b/packages/classification-ggml/CMakeLists.txt
new file mode 100644
index 0000000000..0dec551734
--- /dev/null
+++ b/packages/classification-ggml/CMakeLists.txt
@@ -0,0 +1,130 @@
+cmake_minimum_required(VERSION 3.25)
+
+option(BUILD_TESTING "Build tests" OFF)
+option(ENABLE_COVERAGE "Enable coverage instrumentation for unit tests" OFF)
+if(BUILD_TESTING)
+ list(APPEND VCPKG_MANIFEST_FEATURES "tests")
+endif()
+
+find_package(cmake-bare REQUIRED PATHS node_modules/cmake-bare)
+find_package(cmake-vcpkg REQUIRED PATHS node_modules/cmake-vcpkg)
+
+set(VCPKG_OVERLAY_TRIPLETS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/triplets;${VCPKG_OVERLAY_TRIPLETS}")
+
+# Android STL configuration must be set before project()
+if(DEFINED ENV{ANDROID_NDK} OR DEFINED ENV{ANDROID_NDK_HOME})
+ set(ANDROID_STL c++_shared)
+endif()
+
+project(classification-ggml LANGUAGES C CXX)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ add_compile_options(-stdlib=libc++)
+ add_link_options(-stdlib=libc++ -static-libstdc++)
+endif()
+
+find_path(VCPKG_INSTALLED_PATH share/lint-cpp/.clang-format REQUIRED)
+configure_file(${VCPKG_INSTALLED_PATH}/share/lint-cpp/.clang-format
+ ${CMAKE_CURRENT_SOURCE_DIR}/.clang-format COPYONLY)
+configure_file(${VCPKG_INSTALLED_PATH}/share/lint-cpp/.clang-tidy
+ ${CMAKE_CURRENT_SOURCE_DIR}/.clang-tidy COPYONLY)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if(WIN32)
+ add_definitions(-DWIN32_LEAN_AND_MEAN -DNOMINMAX -DNOGDI)
+endif()
+
+find_path(QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS
+ "inference-addon-cpp/JsInterface.hpp" REQUIRED)
+find_path(STB_INCLUDE_DIRS "stb_image.h" REQUIRED)
+
+find_package(ggml CONFIG REQUIRED)
+
+bare_target(bare_target_value)
+bare_module_target("." unused_target NAME module_name VERSION unused_version)
+set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}")
+message(STATUS "Building classification-ggml with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'")
+
+set(BACKEND_DL_LIBS "")
+if((ANDROID OR UNIX) AND NOT APPLE)
+ foreach(_backend ${GGML_AVAILABLE_BACKENDS})
+ # GGML_AVAILABLE_BACKENDS advertises every backend the upstream port
+ # knows about, but real CMake targets only exist for those actually
+ # built into the installed package (e.g. ggml-vulkan / ggml-opencl
+ # are absent on Android with our vcpkg feature set). Skip missing
+ # ones so add_bare_module's get_target_property() doesn't error out.
+ if(TARGET ggml::${_backend})
+ list(APPEND BACKEND_DL_LIBS INSTALL TARGET ggml::${_backend})
+ endif()
+ endforeach()
+endif()
+
+add_bare_module(classification-ggml EXPORTS ${BACKEND_DL_LIBS})
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ target_link_options(${classification-ggml}_module PRIVATE -Wl,--exclude-libs,ALL)
+endif()
+
+set(ADDON_SOURCES
+ ${PROJECT_SOURCE_DIR}/addon/src/js-interface/binding.cpp
+ ${PROJECT_SOURCE_DIR}/addon/src/model-interface/ClassificationModel.cpp
+ ${PROJECT_SOURCE_DIR}/addon/src/model-interface/ImagePreprocessor.cpp
+ ${PROJECT_SOURCE_DIR}/addon/src/model-interface/MobileNetGraph.cpp
+)
+
+target_sources(
+ ${classification-ggml}
+ PRIVATE
+ ${ADDON_SOURCES}
+)
+
+target_include_directories(
+ ${classification-ggml}
+ PRIVATE
+ ${QVAC_LIB_INFERENCE_ADDON_CPP_INCLUDE_DIRS}
+ ${STB_INCLUDE_DIRS}
+ ${PROJECT_SOURCE_DIR}/addon/src
+)
+
+target_link_libraries(
+ ${classification-ggml}
+ PRIVATE
+ ggml::ggml
+ ggml::ggml-base
+)
+
+# CPU backend: with GGML_CPU_ALL_VARIANTS=ON (Android via qvac-fabric),
+# the single ggml::ggml-cpu target doesn't exist โ it's replaced by
+# per-microarch variants (ggml::ggml-cpu-android_armv8.0_1, ...armv8.2_1,
+# ...armv8.2_2, ...armv8.6_1) loaded as MODULE .so files at runtime via
+# dlopen. On Apple/Linux/Windows desktop where variants are off,
+# ggml::ggml-cpu exists as a single static target.
+if(TARGET ggml::ggml-cpu)
+ target_link_libraries(${classification-ggml} PRIVATE ggml::ggml-cpu)
+endif()
+
+target_compile_definitions(
+ ${classification-ggml}
+ PRIVATE
+ JS_LOGGER
+ BACKENDS_SUBDIR="${BACKENDS_SUBDIR_VALUE}"
+)
+
+if(WIN32)
+ target_link_libraries(
+ ${classification-ggml}
+ PRIVATE
+ msvcrt.lib
+ )
+endif()
+
+if(BUILD_TESTING)
+ find_package(GTest CONFIG REQUIRED)
+ include(GoogleTest)
+ enable_testing()
+ add_subdirectory(test/unit)
+endif()
diff --git a/packages/classification-ggml/LICENSE b/packages/classification-ggml/LICENSE
new file mode 100644
index 0000000000..7d199ae333
--- /dev/null
+++ b/packages/classification-ggml/LICENSE
@@ -0,0 +1,179 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+Copyright 2026 Tether Data, S.A. de C.V.
diff --git a/packages/classification-ggml/NOTICE b/packages/classification-ggml/NOTICE
new file mode 100644
index 0000000000..0bb0b704b5
--- /dev/null
+++ b/packages/classification-ggml/NOTICE
@@ -0,0 +1,42 @@
+@qvac/classification-ggml
+Copyright 2026 Tether Operations Limited
+
+This product includes software developed by Tether Operations Limited
+(https://tether.io/).
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may
+not use this file except in compliance with the License. You may obtain
+a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+================================================================================
+
+This product bundles the following third-party components:
+
+--------------------------------------------------------------------------------
+ggml โ a tensor library for machine learning
+Copyright (c) 2022-2025 Georgi Gerganov
+Licensed under the MIT License.
+Source: https://github.com/ggerganov/ggml
+
+--------------------------------------------------------------------------------
+stb_image.h / stb_image_resize2.h โ Sean Barrett's public-domain image library
+Licensed under the MIT License / Public Domain (dual).
+Source: https://github.com/nothings/stb
+
+--------------------------------------------------------------------------------
+MobileNetV3-Small (Howard et al., 2019) โ reference architecture
+Weights shipped in this package are a 3-class fine-tune of the PyTorch /
+torchvision reference model. Original torchvision model and code are
+Copyright (c) Meta Platforms / PyTorch Contributors, BSD-3-Clause licensed.
+Paper: "Searching for MobileNetV3" (arXiv:1905.02244).
+
+The fine-tuned weight file (weights/mobilenetv3_3class_v3_fp16.gguf) is
+released under the Apache-2.0 license, same as this package.
diff --git a/packages/classification-ggml/README.md b/packages/classification-ggml/README.md
new file mode 100644
index 0000000000..60e7fd5ee1
--- /dev/null
+++ b/packages/classification-ggml/README.md
@@ -0,0 +1,229 @@
+# @qvac/classification-ggml
+
+GGML-powered image classification addon for QVAC. Runs a fine-tuned MobileNetV3-Small 3-class triage CNN on the CPU backend of `libggml` and exposes a small, stable JavaScript API. Now intended for a specific image triage, but can be easily adapted for other classification tasks.
+
+
+| Property | Value |
+| ------------- | ----------------------------------------------- |
+| Model | MobileNetV3-Small (3 classes) |
+| Parameters | ~2.5 M |
+| Weights | FP16 GGUF, **2.94 MB**, bundled in this package |
+| Input | JPEG, PNG, or raw RGB bytes |
+| Resize target | 224 ร 224 (bilinear) |
+| Normalization | ImageNet mean/std |
+| Backend | `libggml` CPU (no GPU dependency) |
+
+
+Package name: `@qvac/classification-ggml`
+Directory: `packages/classification-ggml`
+
+## Install
+
+This addon is published to the `@qvac` scope and consumed like any other QVAC native addon. When used from the monorepo, `npm install` resolves `@qvac/infer-base` and `@qvac/logging` via the workspace.
+
+## Quickstart
+
+```js
+const ImageClassifier = require('@qvac/classification-ggml')
+
+const classifier = new ImageClassifier()
+await classifier.load()
+
+const imageBuffer = fs.readFileSync('./my-image.jpg')
+const result = await classifier.classify(imageBuffer)
+// [ { label: 'food', confidence: 0.93 },
+// { label: 'other', confidence: 0.05 },
+// { label: 'report', confidence: 0.02 } ]
+
+await classifier.unload()
+```
+
+### Raw RGB input
+
+```js
+const result = await classifier.classify(rgbBuffer, {
+ width: 320,
+ height: 240,
+ channels: 3,
+})
+```
+
+### topK filter
+
+By default `classify()` returns one entry per class, sorted from most likely to least likely. Pass `topK: N` to keep only the top `N` results โ for example `topK: 1` returns just the single highest-scoring class:
+
+```js
+const best = await classifier.classify(buf, { topK: 1 })
+```
+
+## API
+
+
+| Method | Description |
+| ---------------------------------- | ----------------------------------------------------------------------- |
+| `new ImageClassifier(opts?)` | `opts = { modelPath?, logger?, nativeLogger? }` |
+| `await load()` | Initialises the GGML backend and loads weights. Idempotent. |
+| `await classify(buffer, options?)` | Runs inference. Returns `[{ label, confidence }, โฆ]` sorted descending. |
+| `await unload()` | Releases native resources. Safe to call again. |
+| `await destroy()` | Releases resources and marks the instance as destroyed. |
+| `getState()` | Returns `{ configLoaded, destroyed }`. |
+
+
+See `index.d.ts` for the full TypeScript surface.
+
+### Parameters
+
+#### `new ImageClassifier(opts?)`
+
+All constructor options are optional.
+
+
+| Option | Type | Default | Description |
+| -------------- | ------------------- | ----------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `modelPath` | `string` | Bundled `weights/mobilenetv3_3class_v3_fp16.gguf` | Absolute path to an FP16 GGUF file. Override only when pointing at a custom fine-tune produced by the ONNXโGGUF conversion guide. Also overridable via the `QVAC_CLASSIFICATION_MODEL_PATH` env variable. |
+| `logger` | `QvacLogger`-shaped | `null` | A sink with optional `error / warn / info / debug(msg)` methods (compatible with `@qvac/logging`). Receives JS-side `info` from a successful `load()` and `error` from a failed `load()`. With `nativeLogger: true`, also receives forwarded native `LogMsg` events at `info` level. Always honoured, regardless of `nativeLogger`. |
+| `nativeLogger` | `boolean` | `false` | When `true`, native C++ `QLOG(...)` lines from inside the addon's model-loading and graph code are forwarded to `logger`. Disabled by default because the underlying `qvac-lib-inference-addon-cpp` logger is a process-wide singleton with a static `uv_async_t` that is not safe across rapid create/destroy cycles (e.g. in tests). |
+
+
+#### `await classify(imageInput, options?)`
+
+
+| Parameter | Type | Default | Description |
+| ------------------------- | -------- | ------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `imageInput` *(required)* | `Buffer` | `Uint8Array` | โ |
+| `options.topK` | `number` | `undefined` (all classes) | If set, the returned array is truncated to this many entries (top-K highest confidences). Must be a positive integer. Passing a value โฅ class count is a no-op. |
+| `options.width` | `number` | โ | **Required** for raw RGB input. Integer > 0. The underlying buffer must be exactly `width ร height ร channels` bytes; any mismatch throws a structured error. |
+| `options.height` | `number` | โ | **Required** for raw RGB input. Integer > 0. |
+| `options.channels` | `3` | โ | **Required** for raw RGB input. Must be exactly `3`. Grayscale and RGBA are not supported โ decode or drop the alpha channel on the caller side. |
+
+
+Returns `Promise` where each entry is `{ label: string; confidence: number }`. The array is sorted by `confidence` descending, confidences are softmax probabilities in `[0, 1]` summing to โ 1, and `label` comes from the loaded GGUF's `mobilenet.class_N` metadata (so a future fine-tune can introduce new label strings without a code change).
+
+#### `await load()` / `await unload()` / `await destroy()`
+
+None take arguments. `load()` is idempotent โ calling it twice is a no-op (check `getState().configLoaded` if you want to verify). `unload()` safely tears down the native handle and may be called multiple times. `destroy()` is equivalent to `unload()` plus a sticky `destroyed` flag in `getState()` โ useful if your code wants to refuse reuse of a released instance.
+
+## Output contract
+
+- An array of `{ label: string, confidence: number }`.
+- Sorted by `confidence` descending.
+- `confidence` values are softmax probabilities in `[0, 1]` and sum to โ 1.
+- Labels come from the GGUF metadata (`mobilenet.class_0/1/2`). For the bundled weights these are `food`, `report`, `other`.
+
+## Build (from source, monorepo)
+
+Prerequisites: clang (LLVM โฅ 19) with matching `libc++-dev`, vcpkg, bare โฅ 1.24, bare-make. CI pins the exact LLVM major via the shared `setup-llvm` action; locally any recent clang works.
+
+```bash
+cd packages/classification-ggml
+npm install
+bare-make generate
+bare-make build
+bare-make install
+```
+
+One-liner: `npm install && bare-make generate && bare-make build && bare-make install`.
+
+## Testing
+
+```bash
+npm run test:integration # brittle + bare JS integration tests (desktop)
+npm run test:cpp # GoogleTest C++ unit tests
+npm run test:mobile:generate # regenerate test/mobile/integration.auto.cjs
+npm run test:mobile:validate # verify mobile test file structure
+```
+
+Integration tests live in `test/integration/*.test.js` and use the 6 sample images under `test/images/` (two images per class).
+
+### Mobile tests
+
+Mobile tests use the shared `qvac-test-addon-mobile` framework. The `test/mobile/integration.auto.cjs` file is auto-generated by `scripts/generate-mobile-integration-tests.js` from every `*.test.js` under `test/integration/`, so adding a new integration test automatically exposes it on mobile too.
+
+Before the mobile harness can be built, run
+
+```bash
+npm run mobile:copy-prebuilds
+```
+
+to populate `test/mobile/testAssets/` (driven by `scripts/copy-mobile-test-assets.js`). The script (a) fans out the single arm64 prebuild into the per-flavour directories the framework expects under `prebuilds/`, (b) copies the FP16 GGUF weights with a `.gguf.bin` suffix so the React Native bundler treats them as a binary asset, and (c) copies every `test/images/*.{jpg,jpeg,png}` into `testAssets/` so the integration tests can resolve them via `global.assetPaths` on-device. None of these copied files are checked into git. See [test/mobile/README.md](test/mobile/README.md) for the lifecycle note about the shared native logger.
+
+## Platform support
+
+
+| Platform | CPU | Notes |
+| ------------------- | --- | ---------------- |
+| Linux x64 | โ
| |
+| Linux arm64 | โ
| |
+| macOS arm64 (Apple) | โ
| |
+| macOS x64 (Intel) | โ
| |
+| Windows x64 | โ
| |
+| Android arm64 | โ
| `c++_shared` STL |
+| iOS arm64 | โ
| |
+
+
+All platforms are produced by the shared `reusable-prebuilds.yml`
+matrix and merged into a single `prebuilds` artifact for downstream
+consumption. GPU (Vulkan / Metal / CUDA) is not currently supported.
+
+## Performance
+
+Depending on the platform, one call to `classifier.classify(buffer)` takes from a few tens to a couple of hundred milliseconds.
+
+### What affects `classify()` latency
+
+- **CPU thread pool** โ libggml sizes its internal CPU worker pool to `std::thread::hardware_concurrency` on every platform. The addon does not expose a tuning knob for this; if a future need arises, raise an issue and we can add one.
+- **Input size** โ the JPEG/PNG decode and the `stb_image_resize2` bilinear pass scale with source pixel count. The 224ร224 tensor pass is fixed-cost; a 12 MP phone photo adds real overhead vs. a 640ร480 webcam frame.
+- **First-call overhead** โ `load()` already runs a full-pipeline warmup (synthetic-pattern pass through preprocess + GGML compute + output read) before returning, so the GGML compute buffers, weight buffer, and worker thread are fully materialised when the first `classify()` is dispatched. Even so, the first user-supplied call is typically a few tens of milliseconds slower than the steady-state average.
+- **Re-use** โ `load()` once, `classify()` many times. Tearing down and rebuilding the model for each image is roughly 4โ6ร slower end-to-end and is never necessary outside of tests.
+
+### Memory footprint
+
+
+| Component | Size |
+| ---------------------------------------------------------- | --------------- |
+| Bundled FP16 weights (mmapped) | 2.94 MB |
+| Backend weight buffer (FP16 + folded BN + FP32 classifier) | โ 5.5 MB |
+| Intermediate activations (compute buffer) | single-digit MB |
+| **Total resident** during inference | **~8โ10 MB** |
+
+
+All GGML compute buffers (input tensor, intermediate activations, output) are allocated **once** at `load()` time and reused on every `classify()` call โ `ggml_backend_tensor_set` / `_get` are the only operations that touch them per request. Per-call C++ allocations are bounded: one input-buffer copy across the bare-runtime boundary, the decoded RGB buffer, the resized 224ร224 RGB buffer, the WHCN F32 tensor, and the 3-element softmax + result vectors. Multiple `ImageClassifier` instances each keep their own compute buffer and worker thread โ you pay the ~8 MB once per instance.
+
+### Why FP16 weights?
+
+FP16 was chosen because it matches FP32 top-1 accuracy on the internal validation set while halving the on-disk footprint (โ3 MB vs โ6 MB) and giving a measurable inference speed-up on every CPU backend we ship. More aggressive quantizations (Q8_0, Q4_K and below) were evaluated on the same validation set and showed noticeable accuracy degradation, which for a 3-class triage model is not acceptable. If you fine-tune your own MobileNetV3-Small, keep FP16 as the publish format unless you re-run the full validation suite at the lower precision.
+
+### Measuring locally
+
+The integration suite hooks the shared `scripts/test-utils/performance-reporter.js` via `test/integration/utils.js`. Running
+
+```bash
+npm run test:integration
+```
+
+writes `test/results/performance-report.json` with one `total_time_ms` entry per sample image, and in GitHub Actions also emits a Markdown step summary.
+
+## Architecture
+
+See `[docs/architecture.md](docs/architecture.md)` for the MobileNetV3-Small layer breakdown and graph construction notes, and `[docs/data-flow.md](docs/data-flow.md)` for the end-to-end request flow.
+
+### Why a custom GGML graph?
+
+`llama-cpp` doesn't support CNN architectures, so this addon bypasses `llama.cpp` entirely and talks to the stable `ggml_*` / `ggml_backend_*` public API.
+
+For this MobileNetV3-Small the GGML CPU backend is, in most configurations, slower per call than the same network running on a mature PyTorch or ONNX Runtime build with their hand-tuned convolution kernels. Because the model is very small (โ2.5 M params, single-digit-millisecond compute on a modern phone), the absolute gap is negligible for a triage workload and is dominated by image decode and JSโnative marshalling. If a substantially larger classifier is ever added on top of this same scaffolding, expect to invest extra effort in graph-level optimisations (operator fusion, matmul tiling, FP16 SIMD kernels, threadpool sizing) before the GGML path is competitive.
+
+## Converting a new model
+
+If you fine-tune or swap the underlying MobileNetV3 model, follow `[docs/onnx-to-gguf-conversion.md](docs/onnx-to-gguf-conversion.md)`. The graph construction is parameterised by `kBlocks` in `MobileNetGraph.hpp` โ only classes and weights change between fine-tunes.
+
+## Troubleshooting
+
+- **โMobileNet GGUF weights not foundโ**: the default path is `/weights/mobilenetv3_3class_v3_fp16.gguf`. Override with `new ImageClassifier({ modelPath: '/abs/path.gguf' })` or set the `QVAC_CLASSIFICATION_MODEL_PATH` env variable.
+- **All predictions look wrong**: verify the BN epsilon is still `0.001` (see the guarded unit test) โ the architecture is unusually sensitive to this constant.
+- **Build fails looking for `stb_image.h`**: make sure the `stb` vcpkg port is installed. The `vcpkg-configuration.json` pins it.
+- **Mobile build fails looking for `libggml-cpu`**: the prebuild workflow copies all `ggml::${_backend}` targets into `prebuilds/`. Re-run `bare-make install`.
+
+## License
+
+Apache-2.0. See `[LICENSE](LICENSE)` and `[NOTICE](NOTICE)`.
\ No newline at end of file
diff --git a/packages/classification-ggml/addon.js b/packages/classification-ggml/addon.js
new file mode 100644
index 0000000000..98dd91e160
--- /dev/null
+++ b/packages/classification-ggml/addon.js
@@ -0,0 +1,109 @@
+'use strict'
+
+// Native JsLogger is a process-wide singleton (static uv_async_t in
+// addon-cpp); install its JS callback once, switch sinks per instance.
+let _loggerInstalled = false
+let _activeLoggerSink = null
+
+function _ensureLoggerInstalled (binding) {
+ if (_loggerInstalled) return
+ const levels = ['error', 'warn', 'info', 'debug']
+ binding.setLogger((priority, message) => {
+ const sink = _activeLoggerSink
+ if (!sink) return
+ const level = levels[priority] || 'info'
+ if (typeof sink[level] === 'function') {
+ try { sink[level](message) } catch (_) {}
+ }
+ })
+ _loggerInstalled = true
+}
+
+function _setActiveLoggerSink (sink) { _activeLoggerSink = sink }
+function _clearActiveLoggerSink (sink) {
+ if (_activeLoggerSink === sink) _activeLoggerSink = null
+}
+
+/**
+ * Normalize a raw native event to `Output` / `Error` / `LogMsg` /
+ * `JobEnded`, or `null` to drop. Keyed on payload shape because the
+ * upstream JobRunner emits the stats trailer with a raw RTTI event
+ * name (no `JobEnded` substring), so an array โ `Output` and a plain
+ * object โ terminal `JobEnded`.
+ */
+function mapAddonEvent (rawEvent, rawData, rawError) {
+ if (typeof rawEvent === 'string') {
+ if (rawEvent.includes('Error')) {
+ return { type: 'Error', data: rawData, error: rawError }
+ }
+ if (rawEvent.includes('LogMsg')) {
+ return { type: 'LogMsg', data: rawData, error: null }
+ }
+ if (rawEvent.includes('JobEnded')) {
+ return { type: 'JobEnded', data: rawData, error: null }
+ }
+ if (rawEvent.includes('JobStarted')) {
+ return null
+ }
+ }
+ if (Array.isArray(rawData)) {
+ return { type: 'Output', data: rawData, error: null }
+ }
+ if (rawData && typeof rawData === 'object') {
+ return { type: 'JobEnded', data: rawData, error: null }
+ }
+ return { type: rawEvent, data: rawData, error: rawError }
+}
+
+/**
+ * Thin JSโnative bridge owning one bare C++ instance handle. Lifecycle
+ * lives in `index.js`, mirroring `LlamaInterface` / `LlmLlamacpp`.
+ *
+ * `opts.disableNativeLogger` controls whether the native LogMsg bridge is
+ * armed for this instance; kept on a sibling arg so `configurationParams`
+ * stays 1:1 with the C++ schema (no JS-only `__`-prefixed flags).
+ */
+class ClassificationInterface {
+ constructor (binding, configurationParams, outputCb, logger = null, opts = {}) {
+ this._binding = binding
+ this._handle = null
+ this._logger = logger
+
+ if (logger && typeof logger === 'object' && !opts.disableNativeLogger) {
+ _ensureLoggerInstalled(binding)
+ _setActiveLoggerSink(logger)
+ }
+
+ this._handle = this._binding.createInstance(this, configurationParams, outputCb)
+ }
+
+ async activate () {
+ if (!this._handle) throw new Error('Classification addon is not initialized')
+ this._binding.activate(this._handle)
+ }
+
+ async runJob (input) {
+ if (!this._handle) throw new Error('Classification addon is not initialized')
+ return this._binding.runJob(this._handle, input)
+ }
+
+ async cancel () {
+ if (!this._handle) return
+ await this._binding.cancel(this._handle)
+ }
+
+ async unload () {
+ if (this._handle === null) return
+ if (this._logger) _clearActiveLoggerSink(this._logger)
+ try {
+ this._binding.destroyInstance(this._handle)
+ } finally {
+ this._handle = null
+ }
+ }
+}
+
+module.exports = {
+ ClassificationInterface,
+ mapAddonEvent
+}
diff --git a/packages/classification-ggml/addon/src/addon/AddonJs.hpp b/packages/classification-ggml/addon/src/addon/AddonJs.hpp
new file mode 100644
index 0000000000..753d469221
--- /dev/null
+++ b/packages/classification-ggml/addon/src/addon/AddonJs.hpp
@@ -0,0 +1,213 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "model-interface/ClassificationModel.hpp"
+
+namespace classification_ggml::bindings {
+
+namespace addon_cpp = qvac_lib_inference_addon_cpp;
+namespace jsu = qvac_lib_inference_addon_cpp::js;
+
+using qvac_errors::StatusError;
+using qvac_errors::general_error::InvalidArgument;
+
+/// `QVAC_CLASSIFICATION_TRACE=1` dumps each marshalled entry to stderr.
+struct JsClassifyOutputHandler
+ : addon_cpp::out_handl::JsBaseOutputHandler {
+ JsClassifyOutputHandler()
+ : JsBaseOutputHandler(
+ [this](const ClassifyOutput& cppOut) -> js_value_t* {
+ auto array = jsu::Array::create(this->env_);
+ const bool trace = []() {
+ const char* v = std::getenv("QVAC_CLASSIFICATION_TRACE");
+ return v != nullptr && v[0] == '1';
+ }();
+
+ for (size_t i = 0; i < cppOut.results.size(); ++i) {
+ const std::string& label = cppOut.results[i].label;
+ const double confidence =
+ static_cast(cppOut.results[i].confidence);
+
+ if (trace) {
+ std::fprintf(
+ stderr,
+ "[qvac-classify-marshal] i=%zu label='%s' "
+ "confidence=%.9f\n",
+ i,
+ label.c_str(),
+ confidence);
+ std::fflush(stderr);
+ }
+
+ auto entry = jsu::Object::create(this->env_);
+ entry.setProperty(
+ this->env_,
+ "label",
+ jsu::String::create(this->env_, label));
+ entry.setProperty(
+ this->env_,
+ "confidence",
+ jsu::Number::create(this->env_, confidence));
+ array.set(this->env_, i, entry);
+ }
+ return array;
+ }) {}
+};
+
+inline js_value_t* createInstance(
+ js_env_t* env, js_callback_info_t* info) try {
+ addon_cpp::JsArgsParser args(env, info);
+
+ auto configObj = args.getJsObject(1, "config");
+ auto modelPath =
+ configObj.getProperty(env, "path").as(env);
+ if (modelPath.empty()) {
+ throw StatusError(
+ InvalidArgument,
+ "Configuration 'path' is required and must be a non-empty string "
+ "pointing at the FP16 GGUF weights file");
+ }
+
+ auto model = std::make_unique(modelPath);
+
+ auto innerConfig =
+ configObj.getOptionalProperty(env, "config");
+ if (innerConfig.has_value()) {
+ auto backendsDirOpt =
+ innerConfig->getOptionalProperty(env, "backendsDir");
+ if (backendsDirOpt.has_value()) {
+ model->setBackendsDir(backendsDirOpt->as(env));
+ }
+ }
+
+ model->load();
+
+ addon_cpp::out_handl::OutputHandlers
+ outHandlers;
+ outHandlers.add(std::make_shared());
+
+ auto callback = std::make_unique(
+ env, args.get(0, "jsHandle"), args.getFunction(2, "outputCallback"),
+ std::move(outHandlers));
+
+ auto addon = std::make_unique(
+ env, std::move(callback),
+ std::unique_ptr(std::move(model)));
+
+ return addon_cpp::JsInterface::createInstance(env, std::move(addon));
+}
+JSCATCH
+
+inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
+ addon_cpp::JsArgsParser args(env, info);
+ addon_cpp::AddonJs& instance =
+ addon_cpp::JsInterface::getInstance(env, args.get(0, "instance"));
+
+ auto inputObj = args.getJsObject(1, "inputObj");
+ auto type =
+ inputObj.getProperty(env, "type").as(env);
+ if (type != "image") {
+ throw StatusError(
+ InvalidArgument,
+ "Classification addon accepts only 'image' input type, got '" + type +
+ "'");
+ }
+
+ ClassifyInput cppInput;
+
+ // Error wording is a test contract: integration suite asserts on the
+ // substrings "required" / "null" / "undefined" for the null-input case.
+ auto bufferVal = inputObj.getProperty(env, "content");
+ if (!jsu::is>(env, bufferVal)) {
+ throw StatusError(
+ InvalidArgument,
+ "Image 'content' is required and must be a Uint8Array / Buffer of "
+ "encoded JPEG/PNG bytes or raw RGB bytes (got null, undefined, or "
+ "wrong type)");
+ }
+ auto ta = jsu::TypedArray(env, bufferVal);
+ auto span = ta.as>(env);
+ if (span.empty()) {
+ throw StatusError(InvalidArgument, "Image 'content' buffer is empty");
+ }
+ cppInput.data.assign(span.begin(), span.end());
+
+ // {width, height, channels} are an all-or-nothing trio: zero present
+ // means encoded JPEG/PNG, three present means raw RGB.
+ auto widthOpt = inputObj.getOptionalProperty(env, "width");
+ auto heightOpt = inputObj.getOptionalProperty(env, "height");
+ auto channelsOpt =
+ inputObj.getOptionalProperty(env, "channels");
+ const int provided = (widthOpt.has_value() ? 1 : 0) +
+ (heightOpt.has_value() ? 1 : 0) +
+ (channelsOpt.has_value() ? 1 : 0);
+ if (provided != 0 && provided != 3) {
+ throw StatusError(
+ InvalidArgument,
+ "Raw RGB input requires all of 'width', 'height', and 'channels' "
+ "to be provided together; received " + std::to_string(provided) +
+ " of 3");
+ }
+ if (provided == 3) {
+ // bare-runtime's `as` static_casts negatives to ~4 billion;
+ // pull the int32_t view first to range-check meaningfully.
+ const int32_t w = widthOpt->as(env);
+ const int32_t h = heightOpt->as(env);
+ const int32_t c = channelsOpt->as(env);
+ if (w <= 0) {
+ throw StatusError(
+ InvalidArgument,
+ "Image 'width' must be a positive integer when passing raw RGB "
+ "bytes; got " + std::to_string(w));
+ }
+ if (h <= 0) {
+ throw StatusError(
+ InvalidArgument,
+ "Image 'height' must be a positive integer when passing raw RGB "
+ "bytes; got " + std::to_string(h));
+ }
+ if (c != 3) {
+ throw StatusError(
+ InvalidArgument,
+ "Image 'channels' must be exactly 3 (RGB) when passing raw RGB "
+ "bytes; got " + std::to_string(c));
+ }
+ cppInput.rawRgb = RawRgbDims{
+ static_cast(w), static_cast(h),
+ static_cast(c)};
+ }
+
+ auto topKOpt = inputObj.getOptionalProperty(env, "topK");
+ if (topKOpt.has_value()) {
+ const int32_t topK = topKOpt->as(env);
+ if (topK <= 0) {
+ throw StatusError(
+ InvalidArgument,
+ "Image 'topK' must be a positive integer when provided; got " +
+ std::to_string(topK));
+ }
+ cppInput.topK = static_cast(topK);
+ }
+
+ return instance.runJob(std::any(std::move(cppInput)));
+}
+JSCATCH
+
+} // namespace classification_ggml::bindings
diff --git a/packages/classification-ggml/addon/src/js-interface/binding.cpp b/packages/classification-ggml/addon/src/js-interface/binding.cpp
new file mode 100644
index 0000000000..875f816043
--- /dev/null
+++ b/packages/classification-ggml/addon/src/js-interface/binding.cpp
@@ -0,0 +1,38 @@
+#include
+
+#include "addon/AddonJs.hpp"
+
+js_value_t* classification_ggml_exports(
+ js_env_t* env,
+ js_value_t* exports) { // NOLINT(readability-identifier-naming)
+
+// NOLINTBEGIN(cppcoreguidelines-macro-usage)
+#define V(name, fn) \
+ { \
+ js_value_t* val; \
+ if (js_create_function(env, name, -1, fn, nullptr, &val) != 0) { \
+ return nullptr; \
+ } \
+ if (js_set_named_property(env, exports, name, val) != 0) { \
+ return nullptr; \
+ } \
+ }
+
+ V("createInstance", classification_ggml::bindings::createInstance)
+ V("runJob", classification_ggml::bindings::runJob)
+
+ V("loadWeights", qvac_lib_inference_addon_cpp::JsInterface::loadWeights)
+ V("activate", qvac_lib_inference_addon_cpp::JsInterface::activate)
+ V("cancel", qvac_lib_inference_addon_cpp::JsInterface::cancel)
+ V("destroyInstance",
+ qvac_lib_inference_addon_cpp::JsInterface::destroyInstance)
+ V("setLogger", qvac_lib_inference_addon_cpp::JsInterface::setLogger)
+ V("releaseLogger", qvac_lib_inference_addon_cpp::JsInterface::releaseLogger)
+
+#undef V
+ // NOLINTEND(cppcoreguidelines-macro-usage)
+
+ return exports;
+}
+
+BARE_MODULE(classification_ggml, classification_ggml_exports)
diff --git a/packages/classification-ggml/addon/src/model-interface/ClassificationModel.cpp b/packages/classification-ggml/addon/src/model-interface/ClassificationModel.cpp
new file mode 100644
index 0000000000..72677e3d75
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/ClassificationModel.cpp
@@ -0,0 +1,325 @@
+#include "ClassificationModel.hpp"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#if defined(__ANDROID__)
+#include
+#endif
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "ImagePreprocessor.hpp"
+#include "MobileNetGraph.hpp"
+
+namespace classification_ggml {
+
+using qvac_errors::StatusError;
+using qvac_errors::general_error::InternalError;
+using qvac_errors::general_error::InvalidArgument;
+
+namespace {
+constexpr const char* kModelName = "mobilenetv3-small-ggml-classification";
+} // namespace
+
+ClassificationModel::ClassificationModel(std::string modelPath)
+ : modelPath_(std::move(modelPath)) {}
+
+ClassificationModel::~ClassificationModel() {
+ // ggml requires buffers to be freed strictly before the backend they were
+ // allocated on; reset both before ggml_backend_free.
+ compute_.reset();
+ weights_.reset();
+ if (backend_ != nullptr) {
+ ggml_backend_free(backend_);
+ backend_ = nullptr;
+ }
+}
+
+std::string ClassificationModel::getName() const {
+ return kModelName;
+}
+
+qvac_lib_inference_addon_cpp::RuntimeStats
+ClassificationModel::runtimeStats() const {
+ using qvac_lib_inference_addon_cpp::RuntimeStats;
+ RuntimeStats stats;
+ const double totalMs = static_cast(lastInferenceUs_) / 1000.0;
+ stats.emplace_back("total_time_ms", totalMs);
+ return stats;
+}
+
+void ClassificationModel::setBackendsDir(std::string backendsDir) {
+ std::scoped_lock lock(mutex_);
+ backendsDir_ = std::move(backendsDir);
+}
+
+namespace {
+
+/// Numerically stable softmax. Falls back to a uniform distribution if
+/// every logit is non-finite or the exp sum overflows, so downstream
+/// code always sees a probability vector that sums to 1.
+std::vector softmax(std::span logits) {
+ if (logits.empty()) {
+ return {};
+ }
+
+ // std::max_element on a span containing NaN is unspecified.
+ float maxLogit = -std::numeric_limits::infinity();
+ for (const float logit : logits) {
+ if (std::isfinite(logit) && logit > maxLogit) {
+ maxLogit = logit;
+ }
+ }
+ if (!std::isfinite(maxLogit)) {
+ const float uniform = 1.0F / static_cast(logits.size());
+ return std::vector(logits.size(), uniform);
+ }
+
+ std::vector probs(logits.size());
+ float sum = 0.0F;
+ for (size_t i = 0; i < logits.size(); ++i) {
+ const float diff = logits[i] - maxLogit;
+ const float e = std::isfinite(diff) ? std::exp(diff) : 0.0F;
+ probs[i] = e;
+ sum += e;
+ }
+
+ if (std::isfinite(sum) && sum > 0.0F) {
+ const float inv = 1.0F / sum;
+ for (float& p : probs) {
+ p *= inv;
+ }
+ } else {
+ const float uniform = 1.0F / static_cast(logits.size());
+ std::fill(probs.begin(), probs.end(), uniform);
+ }
+ return probs;
+}
+
+bool traceEnabled() {
+ const char* v = std::getenv("QVAC_CLASSIFICATION_TRACE");
+ return v != nullptr && v[0] == '1';
+}
+
+} // namespace
+
+void ClassificationModel::load() {
+ std::scoped_lock lock(mutex_);
+ if (loaded_) {
+ return;
+ }
+ if (modelPath_.empty()) {
+ throw StatusError(
+ InvalidArgument,
+ "ClassificationModel requires a path to mobilenetv3 FP16 GGUF weights");
+ }
+
+#if defined(__ANDROID__)
+ // qvac-fabric on Android ships per-microarch CPU variants as MODULE
+ // .so files loaded at runtime via dlopen. ggml_backend_cpu_init() is
+ // not statically linkable here (symbol lives inside the variant .so),
+ // so we open the variants from // and
+ // pick a CPU device through the generic registry API.
+ //
+ // backendsDir comes from JS (`path.join(__dirname, 'prebuilds')`,
+ // mirroring the llamacpp-llm addon) and BACKENDS_SUBDIR is the
+ // compile-time `/` relative path.
+ if (backendsDir_.empty()) {
+ throw StatusError(
+ InvalidArgument,
+ "Configuration 'config.backendsDir' is required on Android");
+ }
+ std::filesystem::path variantsDir =
+ std::filesystem::path(backendsDir_) / BACKENDS_SUBDIR;
+ ggml_backend_load_all_from_path(variantsDir.string().c_str());
+
+ ggml_backend_dev_t cpuDev =
+ ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
+ if (cpuDev == nullptr) {
+ throw StatusError(
+ InternalError,
+ "No CPU backend device registered after loading variants from " +
+ variantsDir.string());
+ }
+ backend_ = ggml_backend_dev_init(cpuDev, /*params=*/nullptr);
+#else
+ backend_ = ggml_backend_cpu_init();
+#endif
+ if (backend_ == nullptr) {
+ throw StatusError(InternalError, "Failed to initialize ggml CPU backend");
+ }
+
+ labels_.clear();
+ weights_ = graph::loadWeights(modelPath_, backend_, labels_);
+ if (labels_.empty()) {
+ labels_ = {"food", "report", "other"};
+ }
+ compute_ = graph::buildGraph(weights_, backend_);
+
+ // One full forward pass at load() time. Without it, the first
+ // user-visible classify() can return NaN logits on win32-x64 CI
+ // because some backend allocator buffers are uninitialised after
+ // buildGraph() and CPU backends can JIT SIMD kernels on cold input.
+ // Symmetric with process(): set, compute, read back, discard.
+ {
+ constexpr uint32_t kWarmupSide = 32;
+ std::vector warmupRgb(
+ static_cast(kWarmupSide) * kWarmupSide * preprocess::kChannels);
+ for (size_t i = 0; i < warmupRgb.size(); ++i) {
+ warmupRgb[i] = static_cast((i * 7) & 0xFFU);
+ }
+ std::vector warmupTensor = preprocess::preprocessToTensor(
+ std::span(warmupRgb.data(), warmupRgb.size()),
+ kWarmupSide, kWarmupSide, preprocess::kChannels);
+ ggml_backend_tensor_set(
+ compute_.input, warmupTensor.data(), 0,
+ warmupTensor.size() * sizeof(float));
+ (void)ggml_backend_graph_compute(backend_, compute_.graph);
+ float warmupLogits[graph::kNumClasses] = {0.0F};
+ ggml_backend_tensor_get(
+ compute_.output, warmupLogits, 0, sizeof(warmupLogits));
+ (void)warmupLogits;
+ }
+
+ loaded_ = true;
+
+ QLOG(
+ qvac_lib_inference_addon_cpp::logger::Priority::INFO,
+ std::string("ClassificationModel loaded (") +
+ std::to_string(labels_.size()) + " classes)");
+}
+
+std::any ClassificationModel::process(const std::any& input) {
+ std::scoped_lock lock(mutex_);
+
+ const auto* inPtr = std::any_cast(&input);
+ if (inPtr == nullptr) {
+ throw StatusError(InvalidArgument, "ClassificationModel: invalid input type");
+ }
+ if (!loaded_) {
+ throw StatusError(
+ InternalError,
+ "ClassificationModel: classify() called before load() or after unload()");
+ }
+
+ const auto t0 = std::chrono::steady_clock::now();
+
+ // The preprocessor's legacy encoded-path sentinel is `uint32_t == 0`;
+ // collapse the optional to that triplet at this boundary.
+ const uint32_t rawW = inPtr->rawRgb.has_value() ? inPtr->rawRgb->width : 0;
+ const uint32_t rawH = inPtr->rawRgb.has_value() ? inPtr->rawRgb->height : 0;
+ const uint32_t rawC =
+ inPtr->rawRgb.has_value() ? inPtr->rawRgb->channels : 0;
+ std::vector inputTensor = preprocess::preprocessToTensor(
+ std::span(inPtr->data.data(), inPtr->data.size()),
+ rawW, rawH, rawC);
+
+ const size_t expected = static_cast(preprocess::kInputSize) *
+ preprocess::kInputSize * preprocess::kChannels;
+ if (inputTensor.size() != expected) {
+ throw StatusError(
+ InternalError, "ClassificationModel: preprocessed tensor has wrong size");
+ }
+
+ ggml_backend_tensor_set(
+ compute_.input, inputTensor.data(), 0,
+ inputTensor.size() * sizeof(float));
+
+ ggml_status status =
+ ggml_backend_graph_compute(backend_, compute_.graph);
+ if (status != GGML_STATUS_SUCCESS) {
+ throw StatusError(
+ InternalError, "ggml_backend_graph_compute failed with status " +
+ std::to_string(static_cast(status)));
+ }
+
+ float logits[graph::kNumClasses] = {0.0F};
+ ggml_backend_tensor_get(
+ compute_.output, logits, 0, sizeof(logits));
+
+ std::vector probs = softmax(std::span(logits, graph::kNumClasses));
+
+ ClassifyOutput output;
+ output.results.reserve(probs.size());
+ for (size_t i = 0; i < probs.size(); ++i) {
+ const std::string label = i < labels_.size()
+ ? labels_[i]
+ : std::string("class_") + std::to_string(i);
+ output.results.push_back({label, probs[i]});
+ }
+
+ // Treat NaN/Inf as smaller than any finite value so the ordering
+ // stays strict-weak even if a future ggml regression slips a
+ // non-finite past the defensive softmax above.
+ std::sort(
+ output.results.begin(),
+ output.results.end(),
+ [](const ClassifyResult& a, const ClassifyResult& b) {
+ const bool aFinite = std::isfinite(a.confidence);
+ const bool bFinite = std::isfinite(b.confidence);
+ if (aFinite != bFinite) {
+ return aFinite;
+ }
+ if (!aFinite && !bFinite) {
+ return false;
+ }
+ return a.confidence > b.confidence;
+ });
+
+ if (traceEnabled()) {
+ std::fprintf(
+ stderr,
+ "[qvac-classify] logits=[%.6f, %.6f, %.6f] "
+ "probs_before_sort=[%.6f, %.6f, %.6f] "
+ "sorted=[{%s:%.6f}, {%s:%.6f}, {%s:%.6f}]\n",
+ static_cast(logits[0]),
+ static_cast(logits[1]),
+ static_cast(logits[2]),
+ static_cast(probs[0]),
+ static_cast(probs[1]),
+ static_cast(probs[2]),
+ output.results.size() > 0 ? output.results[0].label.c_str() : "-",
+ output.results.size() > 0
+ ? static_cast(output.results[0].confidence)
+ : 0.0,
+ output.results.size() > 1 ? output.results[1].label.c_str() : "-",
+ output.results.size() > 1
+ ? static_cast(output.results[1].confidence)
+ : 0.0,
+ output.results.size() > 2 ? output.results[2].label.c_str() : "-",
+ output.results.size() > 2
+ ? static_cast(output.results[2].confidence)
+ : 0.0);
+ std::fflush(stderr);
+ }
+
+ if (inPtr->topK > 0 && inPtr->topK < output.results.size()) {
+ output.results.resize(inPtr->topK);
+ }
+
+ const auto t1 = std::chrono::steady_clock::now();
+ lastInferenceUs_ = static_cast(
+ std::chrono::duration_cast(t1 - t0).count());
+
+ return std::any(std::move(output));
+}
+
+} // namespace classification_ggml
+
diff --git a/packages/classification-ggml/addon/src/model-interface/ClassificationModel.hpp b/packages/classification-ggml/addon/src/model-interface/ClassificationModel.hpp
new file mode 100644
index 0000000000..0c1004f480
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/ClassificationModel.hpp
@@ -0,0 +1,80 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include "MobileNetGraph.hpp"
+
+namespace classification_ggml {
+
+struct RawRgbDims {
+ uint32_t width;
+ uint32_t height;
+ uint32_t channels;
+};
+
+/// Raw classify input. `rawRgb` present = caller-supplied RGB bytes;
+/// absent = encoded JPEG/PNG, dimensions come from the decoder.
+struct ClassifyInput {
+ std::vector data;
+ std::optional rawRgb;
+ uint32_t topK = 0; // 0 = no topK filter
+};
+
+struct ClassifyResult {
+ std::string label;
+ float confidence;
+};
+
+/// Sorted by confidence descending.
+struct ClassifyOutput {
+ std::vector results;
+};
+
+/// MobileNetV3-Small 3-class classifier on libggml's CPU backend.
+class ClassificationModel
+ : public qvac_lib_inference_addon_cpp::model::IModel {
+public:
+ explicit ClassificationModel(std::string modelPath);
+ ~ClassificationModel() override;
+
+ ClassificationModel(const ClassificationModel&) = delete;
+ ClassificationModel& operator=(const ClassificationModel&) = delete;
+
+ [[nodiscard]] std::string getName() const override;
+ std::any process(const std::any& input) override;
+ [[nodiscard]] qvac_lib_inference_addon_cpp::RuntimeStats
+ runtimeStats() const override;
+
+ /// Called from createInstance so load failures surface synchronously.
+ void load();
+
+ /// Optional addon-prebuilds root (e.g. `/prebuilds`). On Android
+ /// it's combined with the BACKENDS_SUBDIR compile-time relative path to
+ /// locate the per-microarch CPU variant .so files for ggml's runtime
+ /// backend loader. No-op on platforms where the CPU backend is static.
+ void setBackendsDir(std::string backendsDir);
+
+private:
+ std::string modelPath_;
+ std::string backendsDir_;
+ ggml_backend_t backend_ = nullptr;
+ graph::WeightsBundle weights_;
+ graph::ComputeGraph compute_;
+ std::vector labels_;
+ bool loaded_ = false;
+ uint64_t lastInferenceUs_ = 0;
+ mutable std::mutex mutex_;
+};
+
+} // namespace classification_ggml
diff --git a/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.cpp b/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.cpp
new file mode 100644
index 0000000000..a8c9452946
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.cpp
@@ -0,0 +1,231 @@
+#include "ImagePreprocessor.hpp"
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+// stb single-header implementations live here for the whole addon.
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_RESIZE_IMPLEMENTATION
+#include
+#include
+
+namespace classification_ggml::preprocess {
+
+namespace {
+using qvac_errors::general_error::InvalidArgument;
+using qvac_errors::StatusError;
+
+constexpr size_t kDecodedChannels = 3;
+
+[[noreturn]] void raise(const std::string& message) {
+ throw StatusError(InvalidArgument, message);
+}
+
+bool startsWith(
+ std::span buffer, std::span prefix) {
+ if (buffer.size() < prefix.size()) {
+ return false;
+ }
+ for (size_t i = 0; i < prefix.size(); ++i) {
+ if (buffer[i] != prefix[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+} // namespace
+
+bool isEncodedImage(std::span buffer) {
+ // JPEG: FF D8 FF ...
+ constexpr std::array kJpegMagic = {0xFF, 0xD8, 0xFF};
+ // PNG: 89 50 4E 47 0D 0A 1A 0A
+ constexpr std::array kPngMagic = {
+ 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};
+
+ return startsWith(buffer, kJpegMagic) || startsWith(buffer, kPngMagic);
+}
+
+std::vector decodeToRgb(
+ std::span encodedBuffer, uint32_t& outWidth,
+ uint32_t& outHeight) {
+ if (encodedBuffer.empty()) {
+ raise("Input image buffer is empty");
+ }
+ if (encodedBuffer.size() >
+ static_cast(std::numeric_limits::max())) {
+ raise("Input image buffer too large for decoder");
+ }
+
+ // Header-only inspection so we can reject oversized images before
+ // stbi_load allocates the full RGB buffer (~300 MB for 100 MP).
+ // stbi_info returning 0 = header unparseable; defer to stbi_load
+ // below so the caller gets stbi_failure_reason() instead of a
+ // generic "header bad" from us.
+ {
+ int infoWidth = 0;
+ int infoHeight = 0;
+ int infoChannels = 0;
+ if (stbi_info_from_memory(
+ encodedBuffer.data(), static_cast(encodedBuffer.size()),
+ &infoWidth, &infoHeight, &infoChannels) != 0) {
+ if (infoWidth <= 0 || infoHeight <= 0) {
+ raise("Decoded image has invalid dimensions");
+ }
+ if (static_cast(infoWidth) > kMaxImageDimension ||
+ static_cast(infoHeight) > kMaxImageDimension) {
+ raise(
+ "Image exceeds maximum allowed dimension (" +
+ std::to_string(kMaxImageDimension) + " px per axis); header "
+ "reported " + std::to_string(infoWidth) + "x" +
+ std::to_string(infoHeight));
+ }
+ }
+ }
+
+ int width = 0;
+ int height = 0;
+ int channelsIgnored = 0;
+ // Force 3 output channels โ downstream never deals with alpha/grayscale.
+ uint8_t* pixels = stbi_load_from_memory(
+ encodedBuffer.data(), static_cast(encodedBuffer.size()), &width,
+ &height, &channelsIgnored, static_cast(kDecodedChannels));
+
+ if (pixels == nullptr) {
+ const char* reason = stbi_failure_reason();
+ std::string msg = "Failed to decode image (only JPEG and PNG are supported)";
+ if (reason != nullptr) {
+ msg += ": ";
+ msg += reason;
+ }
+ raise(msg);
+ }
+
+ if (width <= 0 || height <= 0) {
+ stbi_image_free(pixels);
+ raise("Decoded image has invalid dimensions");
+ }
+ if (static_cast(width) > kMaxImageDimension ||
+ static_cast(height) > kMaxImageDimension) {
+ stbi_image_free(pixels);
+ raise(
+ "Image exceeds maximum allowed dimension (" +
+ std::to_string(kMaxImageDimension) + " px per axis)");
+ }
+
+ const size_t byteCount = static_cast(width) *
+ static_cast(height) * kDecodedChannels;
+ std::vector out(pixels, pixels + byteCount);
+ stbi_image_free(pixels);
+
+ outWidth = static_cast(width);
+ outHeight = static_cast(height);
+ return out;
+}
+
+void validateRawRgb(
+ std::span rawBuffer, uint32_t width, uint32_t height,
+ uint32_t channels) {
+ if (rawBuffer.empty()) {
+ raise("Raw image buffer is empty");
+ }
+ if (channels != kChannels) {
+ raise(
+ "Raw image must have exactly 3 channels (RGB); got " +
+ std::to_string(channels));
+ }
+ if (width == 0 || height == 0) {
+ raise("Raw image width and height must be greater than zero");
+ }
+ if (width > kMaxImageDimension || height > kMaxImageDimension) {
+ raise(
+ "Raw image exceeds maximum allowed dimension (" +
+ std::to_string(kMaxImageDimension) + " px per axis)");
+ }
+ const size_t expected = static_cast(width) *
+ static_cast(height) *
+ static_cast(channels);
+ if (rawBuffer.size() != expected) {
+ raise(
+ "Raw image buffer size " + std::to_string(rawBuffer.size()) +
+ " does not match declared dimensions " + std::to_string(width) + "x" +
+ std::to_string(height) + "x" + std::to_string(channels) +
+ " (expected " + std::to_string(expected) + " bytes)");
+ }
+}
+
+std::vector resizeToInput(
+ std::span srcRgb, uint32_t srcWidth, uint32_t srcHeight) {
+ std::vector out(kInputSize * kInputSize * kChannels);
+ unsigned char* ok = stbir_resize_uint8_linear(
+ srcRgb.data(), static_cast(srcWidth), static_cast(srcHeight),
+ static_cast(srcWidth * kChannels), out.data(),
+ static_cast(kInputSize), static_cast(kInputSize),
+ static_cast(kInputSize * kChannels), STBIR_RGB);
+ if (ok == nullptr) {
+ raise("Failed to resize image to 224x224");
+ }
+ return out;
+}
+
+std::vector normalizeToWhcn(std::span rgb224) {
+ if (rgb224.size() !=
+ static_cast(kInputSize) * kInputSize * kChannels) {
+ raise("Internal error: resized buffer does not have expected size");
+ }
+ constexpr float kUnit = 1.0F / 255.0F;
+
+ // ggml WHCN: contiguous, fastest-varying axis = width.
+ // offset(w, h, c) = c*H*W + h*W + w
+ std::vector out(static_cast(kInputSize) * kInputSize * kChannels);
+ const size_t plane = static_cast(kInputSize) * kInputSize;
+
+ for (uint32_t y = 0; y < kInputSize; ++y) {
+ for (uint32_t x = 0; x < kInputSize; ++x) {
+ const size_t srcIdx =
+ (static_cast(y) * kInputSize + x) * kChannels;
+ const size_t dstBase = static_cast(y) * kInputSize + x;
+ for (uint32_t c = 0; c < kChannels; ++c) {
+ const float pixel = static_cast(rgb224[srcIdx + c]) * kUnit;
+ out[c * plane + dstBase] =
+ (pixel - kImageNetMean[c]) / kImageNetStd[c];
+ }
+ }
+ }
+ return out;
+}
+
+std::vector preprocessToTensor(
+ std::span input, uint32_t declaredWidth,
+ uint32_t declaredHeight, uint32_t declaredChannels) {
+ if (input.empty()) {
+ raise("Input image buffer is empty");
+ }
+
+ std::vector rgb;
+ uint32_t width = 0;
+ uint32_t height = 0;
+
+ if (declaredWidth > 0 || declaredHeight > 0 || declaredChannels > 0) {
+ validateRawRgb(input, declaredWidth, declaredHeight, declaredChannels);
+ rgb.assign(input.begin(), input.end());
+ width = declaredWidth;
+ height = declaredHeight;
+ } else {
+ if (!isEncodedImage(input)) {
+ raise(
+ "Unsupported image format: expected JPEG or PNG, or pass "
+ "'{ width, height, channels: 3 }' with raw RGB bytes");
+ }
+ rgb = decodeToRgb(input, width, height);
+ }
+
+ std::vector resized = resizeToInput(rgb, width, height);
+ return normalizeToWhcn(resized);
+}
+
+} // namespace classification_ggml::preprocess
diff --git a/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.hpp b/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.hpp
new file mode 100644
index 0000000000..ba76331477
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/ImagePreprocessor.hpp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+
+namespace classification_ggml::preprocess {
+
+constexpr uint32_t kInputSize = 224;
+constexpr uint32_t kChannels = 3;
+/// OOM defence โ reject inputs larger than this on either axis.
+constexpr uint32_t kMaxImageDimension = 16384;
+
+/// ImageNet per-channel normalization, matching torchvision's MobileNetV3.
+constexpr std::array kImageNetMean = {0.485F, 0.456F, 0.406F};
+constexpr std::array kImageNetStd = {0.229F, 0.224F, 0.225F};
+
+/// True for JPEG/PNG magic bytes; false routes to the raw-RGB path.
+bool isEncodedImage(std::span buffer);
+
+/// Decode JPEG/PNG to packed RGB. Throws StatusError on any failure.
+std::vector decodeToRgb(
+ std::span encodedBuffer, uint32_t& outWidth,
+ uint32_t& outHeight);
+
+/// Throws StatusError if the buffer doesn't match the declared shape,
+/// channels != 3, or dimensions exceed `kMaxImageDimension`.
+void validateRawRgb(
+ std::span rawBuffer, uint32_t width, uint32_t height,
+ uint32_t channels);
+
+/// Bilinear resize (stb_image_resize2) to `kInputSize` square.
+std::vector resizeToInput(
+ std::span srcRgb, uint32_t srcWidth, uint32_t srcHeight);
+
+/// `kInputSize` ร `kInputSize` RGB โ FP32 WHCN tensor, ImageNet-normalized.
+std::vector normalizeToWhcn(std::span rgb224);
+
+/// Full pipeline: encoded-or-raw buffer โ FP32 WHCN tensor.
+std::vector preprocessToTensor(
+ std::span input, uint32_t declaredWidth,
+ uint32_t declaredHeight, uint32_t declaredChannels);
+
+} // namespace classification_ggml::preprocess
diff --git a/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.cpp b/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.cpp
new file mode 100644
index 0000000000..9eaf3009b0
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.cpp
@@ -0,0 +1,644 @@
+#include "MobileNetGraph.hpp"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace classification_ggml::graph {
+
+namespace {
+
+using qvac_errors::StatusError;
+using qvac_errors::general_error::InternalError;
+using qvac_errors::general_error::InvalidArgument;
+
+[[noreturn]] void raise(const std::string& msg) {
+ throw StatusError(InternalError, msg);
+}
+
+[[noreturn]] void raiseInvalid(const std::string& msg) {
+ throw StatusError(InvalidArgument, msg);
+}
+
+// FP16 tensors are storage-only; runtime-math tensors (BN scale/shift,
+// FC weights) are promoted to F32 at load time so the graph never casts.
+
+void fp16ToFp32(const void* src, float* out, size_t count) {
+ const auto* halfPtr = static_cast(src);
+ for (size_t i = 0; i < count; ++i) {
+ out[i] = ggml_fp16_to_fp32(halfPtr[i]);
+ }
+}
+
+struct ggml_tensor* cloneRaw(
+ struct ggml_context* bundleCtx, const gguf_context* gguf,
+ struct ggml_context* ggufCtx, const char* name) {
+ const int idx = gguf_find_tensor(gguf, name);
+ if (idx < 0) {
+ raise(std::string("Missing tensor in GGUF: ") + name);
+ }
+ struct ggml_tensor* src = ggml_get_tensor(ggufCtx, name);
+ if (src == nullptr) {
+ raise(std::string("Cannot resolve tensor from ggml ctx: ") + name);
+ }
+ struct ggml_tensor* dst =
+ ggml_new_tensor(bundleCtx, src->type, ggml_n_dims(src), src->ne);
+ ggml_set_name(dst, name);
+ return dst;
+}
+
+/// Like cloneRaw but forces the destination dtype to F32.
+struct ggml_tensor* cloneAsFp32(
+ struct ggml_context* bundleCtx, const char* name, int n_dims,
+ const int64_t* ne) {
+ struct ggml_tensor* dst = ggml_new_tensor(bundleCtx, GGML_TYPE_F32, n_dims, ne);
+ ggml_set_name(dst, name);
+ return dst;
+}
+
+// torchvision same-padding: p = (k - 1) / 2.
+constexpr int samePadding(int kernel) {
+ return (kernel - 1) / 2;
+}
+
+/// Read a GGUF tensor (FP16 or FP32) into an FP32 vector.
+std::vector loadVector1d(
+ const gguf_context* gguf, struct ggml_context* ggufCtx,
+ const std::string& name) {
+ (void)gguf;
+ struct ggml_tensor* t = ggml_get_tensor(ggufCtx, name.c_str());
+ if (t == nullptr) {
+ raise("Missing BN tensor: " + name);
+ }
+ const size_t count = ggml_nelements(t);
+ std::vector out(count);
+ if (t->type == GGML_TYPE_F32) {
+ std::memcpy(out.data(), t->data, count * sizeof(float));
+ } else if (t->type == GGML_TYPE_F16) {
+ fp16ToFp32(t->data, out.data(), count);
+ } else {
+ raise("Unsupported BN tensor dtype for: " + name);
+ }
+ return out;
+}
+
+/// Folded BN: `x * scale + shift`, scale/shift pre-reshaped to [1,1,C,1].
+struct ggml_tensor* applyFoldedBn(
+ struct ggml_context* ctx, struct ggml_tensor* x,
+ struct ggml_tensor* scale, struct ggml_tensor* shift) {
+ struct ggml_tensor* scaled = ggml_mul(ctx, x, scale);
+ return ggml_add(ctx, scaled, shift);
+}
+
+struct GraphBuilder {
+ struct ggml_context* ctx;
+ const std::unordered_map& w;
+
+ struct ggml_tensor* t(const std::string& name) const {
+ auto it = w.find(name);
+ if (it == w.end()) {
+ raise("Missing weight tensor at graph build time: " + name);
+ }
+ return it->second;
+ }
+
+ struct ggml_tensor* activate(struct ggml_tensor* x, bool useHardswish) {
+ return useHardswish ? ggml_hardswish(ctx, x) : ggml_relu(ctx, x);
+ }
+
+ /// Conv2d + folded BN [+ optional activation].
+ struct ggml_tensor* convBnAct(
+ struct ggml_tensor* x, const std::string& convPrefix,
+ const std::string& bnPrefix, int stride, int kernel, bool activate,
+ bool useHardswish) {
+ struct ggml_tensor* kernelT = t(convPrefix + ".weight");
+ const int pad = samePadding(kernel);
+ struct ggml_tensor* conv =
+ ggml_conv_2d(ctx, kernelT, x, stride, stride, pad, pad, 1, 1);
+ struct ggml_tensor* bn =
+ applyFoldedBn(ctx, conv, t(bnPrefix + ".scale"), t(bnPrefix + ".shift"));
+ if (!activate) {
+ return bn;
+ }
+ return this->activate(bn, useHardswish);
+ }
+
+ /// Depthwise Conv2d + folded BN + activation.
+ struct ggml_tensor* dwConvBnAct(
+ struct ggml_tensor* x, const std::string& convPrefix,
+ const std::string& bnPrefix, int stride, int kernel, bool useHardswish) {
+ struct ggml_tensor* kernelT = t(convPrefix + ".weight");
+ const int pad = samePadding(kernel);
+ struct ggml_tensor* conv =
+ ggml_conv_2d_dw(ctx, kernelT, x, stride, stride, pad, pad, 1, 1);
+ struct ggml_tensor* bn =
+ applyFoldedBn(ctx, conv, t(bnPrefix + ".scale"), t(bnPrefix + ".shift"));
+ return activate(bn, useHardswish);
+ }
+
+ /// SE: avgpool โ 1x1 reduce + ReLU โ 1x1 expand + HardSigmoid โ mul.
+ struct ggml_tensor* seBlock(
+ struct ggml_tensor* x, const std::string& sePrefix, int spatialHw) {
+ struct ggml_tensor* pooled = ggml_pool_2d(
+ ctx, x, GGML_OP_POOL_AVG, spatialHw, spatialHw, spatialHw, spatialHw,
+ 0, 0);
+
+ struct ggml_tensor* fc1 = ggml_conv_2d(
+ ctx, t(sePrefix + ".fc1.weight"), pooled, 1, 1, 0, 0, 1, 1);
+ fc1 = ggml_add(ctx, fc1, t(sePrefix + ".fc1.bias_br"));
+ fc1 = ggml_relu(ctx, fc1);
+
+ struct ggml_tensor* fc2 = ggml_conv_2d(
+ ctx, t(sePrefix + ".fc2.weight"), fc1, 1, 1, 0, 0, 1, 1);
+ fc2 = ggml_add(ctx, fc2, t(sePrefix + ".fc2.bias_br"));
+
+ struct ggml_tensor* gate = ggml_hardsigmoid(ctx, fc2);
+ return ggml_mul(ctx, x, gate);
+ }
+
+ struct ggml_tensor* invertedResidual(
+ struct ggml_tensor* x, const BlockConfig& cfg, int inputSpatialHw) {
+ const std::string base = "features." + std::to_string(cfg.featuresIndex);
+ const bool hasExpand = cfg.expandedChannels != cfg.inputChannels;
+
+ int spatial = inputSpatialHw;
+ struct ggml_tensor* y = x;
+
+ int dwBlockIdx = 0;
+ int seBlockIdx = -1;
+ int projBlockIdx = 0;
+
+ if (hasExpand) {
+ y = convBnAct(
+ y, base + ".block.0.0", base + ".block.0.1",
+ /*stride=*/1, /*kernel=*/1, /*activate=*/true, cfg.useHardswish);
+ dwBlockIdx = 1;
+ if (cfg.useSe) {
+ seBlockIdx = 2;
+ projBlockIdx = 3;
+ } else {
+ projBlockIdx = 2;
+ }
+ } else {
+ dwBlockIdx = 0;
+ if (cfg.useSe) {
+ seBlockIdx = 1;
+ projBlockIdx = 2;
+ } else {
+ projBlockIdx = 1;
+ }
+ }
+
+ const std::string dwPrefix = base + ".block." + std::to_string(dwBlockIdx);
+ y = dwConvBnAct(
+ y, dwPrefix + ".0", dwPrefix + ".1", cfg.stride, cfg.depthwiseKernel,
+ cfg.useHardswish);
+ if (cfg.stride == 2) {
+ spatial = (spatial + 1) / 2;
+ }
+
+ if (cfg.useSe) {
+ const std::string sePrefix =
+ base + ".block." + std::to_string(seBlockIdx);
+ y = seBlock(y, sePrefix, spatial);
+ }
+
+ const std::string projPrefix =
+ base + ".block." + std::to_string(projBlockIdx);
+ y = convBnAct(
+ y, projPrefix + ".0", projPrefix + ".1",
+ /*stride=*/1, /*kernel=*/1, /*activate=*/false, cfg.useHardswish);
+
+ if (cfg.stride == 1 && cfg.inputChannels == cfg.outputChannels) {
+ y = ggml_add(ctx, y, x);
+ }
+ return y;
+ }
+};
+
+} // namespace
+
+WeightsBundle::~WeightsBundle() { reset(); }
+
+WeightsBundle::WeightsBundle(WeightsBundle&& other) noexcept
+ : ctx(std::move(other.ctx)),
+ tensors(std::move(other.tensors)),
+ backendBuffer(other.backendBuffer) {
+ other.backendBuffer = nullptr;
+}
+
+WeightsBundle& WeightsBundle::operator=(WeightsBundle&& other) noexcept {
+ if (this != &other) {
+ reset();
+ ctx = std::move(other.ctx);
+ tensors = std::move(other.tensors);
+ backendBuffer = other.backendBuffer;
+ other.backendBuffer = nullptr;
+ }
+ return *this;
+}
+
+void WeightsBundle::reset() {
+ tensors.clear();
+ ctx.reset();
+ if (backendBuffer != nullptr) {
+ ggml_backend_buffer_free(backendBuffer);
+ backendBuffer = nullptr;
+ }
+}
+
+ComputeGraph::~ComputeGraph() { reset(); }
+
+ComputeGraph::ComputeGraph(ComputeGraph&& other) noexcept
+ : ctx(std::move(other.ctx)),
+ graph(other.graph),
+ input(other.input),
+ output(other.output),
+ backendBuffer(other.backendBuffer) {
+ other.graph = nullptr;
+ other.input = nullptr;
+ other.output = nullptr;
+ other.backendBuffer = nullptr;
+}
+
+ComputeGraph& ComputeGraph::operator=(ComputeGraph&& other) noexcept {
+ if (this != &other) {
+ reset();
+ ctx = std::move(other.ctx);
+ graph = other.graph;
+ input = other.input;
+ output = other.output;
+ backendBuffer = other.backendBuffer;
+ other.graph = nullptr;
+ other.input = nullptr;
+ other.output = nullptr;
+ other.backendBuffer = nullptr;
+ }
+ return *this;
+}
+
+void ComputeGraph::reset() {
+ graph = nullptr;
+ input = nullptr;
+ output = nullptr;
+ ctx.reset();
+ if (backendBuffer != nullptr) {
+ ggml_backend_buffer_free(backendBuffer);
+ backendBuffer = nullptr;
+ }
+}
+
+WeightsBundle loadWeights(
+ const std::string& ggufPath, ggml_backend_t backend,
+ std::vector& outLabels) {
+ outLabels.clear();
+ struct ggml_context* ggufCtx = nullptr;
+ gguf_init_params params{/*no_alloc=*/false, &ggufCtx};
+ gguf_context* gguf = gguf_init_from_file(ggufPath.c_str(), params);
+ if (gguf == nullptr) {
+ raiseInvalid("Failed to open GGUF file: " + ggufPath);
+ }
+ std::unique_ptr ggufGuard(gguf, gguf_free);
+ std::unique_ptr ggufCtxGuard(
+ ggufCtx, ggml_free);
+
+ // Default to the architecture-standard 0.001 (PyTorch's BN default).
+ // Never silently fall back to torchvision's 1e-5 reference value.
+ float bnEps = kBatchNormEpsilon;
+ {
+ const int64_t epsIdx = gguf_find_key(gguf, "mobilenet.bn_eps");
+ if (epsIdx >= 0) {
+ bnEps = gguf_get_val_f32(gguf, static_cast(epsIdx));
+ }
+ }
+
+ {
+ uint32_t numClasses = kNumClasses;
+ const int64_t idxN = gguf_find_key(gguf, "mobilenet.num_classes");
+ if (idxN >= 0) {
+ numClasses = gguf_get_val_u32(gguf, static_cast(idxN));
+ }
+ // Mismatch silently corrupts the classifier upload and the per-call
+ // tensor_get; reject up front.
+ if (numClasses != kNumClasses) {
+ raiseInvalid(
+ "GGUF metadata 'mobilenet.num_classes' (" +
+ std::to_string(numClasses) +
+ ") does not match the addon's compiled-in class count (" +
+ std::to_string(kNumClasses) +
+ "); rebuild @qvac/classification-ggml against this model or use "
+ "a GGUF with the expected number of classes");
+ }
+ for (uint32_t i = 0; i < numClasses; ++i) {
+ const std::string key = "mobilenet.class_" + std::to_string(i);
+ const int64_t idx = gguf_find_key(gguf, key.c_str());
+ if (idx < 0) {
+ outLabels.clear();
+ break;
+ }
+ outLabels.emplace_back(gguf_get_val_str(gguf, static_cast(idx)));
+ }
+ }
+
+ WeightsBundle bundle;
+ const size_t ctxSize = ggml_tensor_overhead() * 4096;
+ bundle.ctx = std::unique_ptr(
+ ggml_init({ctxSize, nullptr, /*no_alloc=*/true}), ggml_free);
+ if (!bundle.ctx) {
+ raise("Failed to allocate weights ggml context");
+ }
+
+ auto& tensors = bundle.tensors;
+
+ auto registerTensor = [&](struct ggml_tensor* dst) {
+ tensors.emplace(ggml_get_name(dst), dst);
+ };
+
+ auto addConvWeight = [&](const std::string& name) {
+ struct ggml_tensor* t = cloneRaw(bundle.ctx.get(), gguf, ggufCtx, name.c_str());
+ registerTensor(t);
+ };
+
+ // SE bias is registered twice: 1D raw (used by unit tests) and an F32
+ // [1,1,C,1] broadcast view (consumed by the graph against 4D feature maps).
+ auto addSeBiasBroadcast = [&](const std::string& name, int channels) {
+ struct ggml_tensor* raw =
+ cloneRaw(bundle.ctx.get(), gguf, ggufCtx, name.c_str());
+ registerTensor(raw);
+
+ const int64_t shape4d[4] = {1, 1, channels, 1};
+ const std::string brName = name + "_br";
+ struct ggml_tensor* br = cloneAsFp32(bundle.ctx.get(), brName.c_str(), 4, shape4d);
+ tensors.emplace(brName, br);
+ };
+
+ // Fold BN at load time: replaces ~34 per-inference sqrt + 4-op chains.
+ auto addFoldedBn = [&](const std::string& bnPrefix, int channels) {
+ const int64_t shape4d[4] = {1, 1, channels, 1};
+ struct ggml_tensor* scale =
+ cloneAsFp32(bundle.ctx.get(), (bnPrefix + ".scale").c_str(), 4, shape4d);
+ struct ggml_tensor* shift =
+ cloneAsFp32(bundle.ctx.get(), (bnPrefix + ".shift").c_str(), 4, shape4d);
+ tensors.emplace(bnPrefix + ".scale", scale);
+ tensors.emplace(bnPrefix + ".shift", shift);
+ };
+
+ auto addFcWeightFp32 = [&](const std::string& name, int in, int out) {
+ const int64_t shape[2] = {in, out};
+ struct ggml_tensor* t = cloneAsFp32(bundle.ctx.get(), name.c_str(), 2, shape);
+ tensors.emplace(name, t);
+ };
+ auto addFcBiasFp32 = [&](const std::string& name, int out) {
+ const int64_t shape[1] = {out};
+ struct ggml_tensor* t = cloneAsFp32(bundle.ctx.get(), name.c_str(), 1, shape);
+ tensors.emplace(name, t);
+ };
+
+ addConvWeight("features.0.0.weight");
+ addFoldedBn("features.0.1", kStemOutChannels);
+
+ for (const BlockConfig& cfg : kBlocks) {
+ const std::string base = "features." + std::to_string(cfg.featuresIndex);
+ const bool hasExpand = cfg.expandedChannels != cfg.inputChannels;
+ int dwIdx = 0;
+ int seIdx = -1;
+ int projIdx = 0;
+ if (hasExpand) {
+ addConvWeight(base + ".block.0.0.weight");
+ addFoldedBn(base + ".block.0.1", cfg.expandedChannels);
+ dwIdx = 1;
+ if (cfg.useSe) {
+ seIdx = 2;
+ projIdx = 3;
+ } else {
+ projIdx = 2;
+ }
+ } else {
+ if (cfg.useSe) {
+ seIdx = 1;
+ projIdx = 2;
+ } else {
+ projIdx = 1;
+ }
+ }
+ const std::string dwBase = base + ".block." + std::to_string(dwIdx);
+ addConvWeight(dwBase + ".0.weight");
+ addFoldedBn(dwBase + ".1", cfg.expandedChannels);
+
+ if (cfg.useSe) {
+ const std::string seBase = base + ".block." + std::to_string(seIdx);
+ addConvWeight(seBase + ".fc1.weight");
+ addSeBiasBroadcast(seBase + ".fc1.bias", cfg.seReducedChannels);
+ addConvWeight(seBase + ".fc2.weight");
+ addSeBiasBroadcast(seBase + ".fc2.bias", cfg.expandedChannels);
+ }
+
+ const std::string projBase = base + ".block." + std::to_string(projIdx);
+ addConvWeight(projBase + ".0.weight");
+ addFoldedBn(projBase + ".1", cfg.outputChannels);
+ }
+
+ addConvWeight("features.12.0.weight");
+ addFoldedBn("features.12.1", kTailOutChannels);
+
+ addFcWeightFp32("classifier.0.weight", kTailOutChannels, kClassifierHidden);
+ addFcBiasFp32("classifier.0.bias", kClassifierHidden);
+ addFcWeightFp32("classifier.3.weight", kClassifierHidden, kNumClasses);
+ addFcBiasFp32("classifier.3.bias", kNumClasses);
+
+ bundle.backendBuffer =
+ ggml_backend_alloc_ctx_tensors(bundle.ctx.get(), backend);
+ if (bundle.backendBuffer == nullptr) {
+ raise("Failed to allocate backend buffer for weights");
+ }
+
+ // First pass: raw byte copies for storage-only tensors. Folded/promoted
+ // tensors are filled by foldBn / foldSeBias / uploadClassifierTensor below.
+ for (auto& [name, dst] : tensors) {
+ if (name.ends_with(".scale") || name.ends_with(".shift") ||
+ name.ends_with(".bias_br") || name == "classifier.0.weight" ||
+ name == "classifier.0.bias" || name == "classifier.3.weight" ||
+ name == "classifier.3.bias") {
+ continue;
+ }
+ struct ggml_tensor* src = ggml_get_tensor(ggufCtx, name.c_str());
+ if (src == nullptr) {
+ raise("Source tensor missing from GGUF: " + name);
+ }
+ if (src->type != dst->type) {
+ raise("Dtype mismatch while copying tensor: " + name);
+ }
+ ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));
+ }
+
+ auto uploadF32 = [&](struct ggml_tensor* dst, const std::vector& buf) {
+ if (static_cast(ggml_nelements(dst)) != buf.size()) {
+ raise(
+ std::string("Element count mismatch for ") + ggml_get_name(dst) +
+ ": expected " + std::to_string(ggml_nelements(dst)) + ", got " +
+ std::to_string(buf.size()));
+ }
+ ggml_backend_tensor_set(dst, buf.data(), 0, buf.size() * sizeof(float));
+ };
+
+ auto foldBn = [&](const std::string& bnPrefix) {
+ std::vector w =
+ loadVector1d(gguf, ggufCtx, bnPrefix + ".weight");
+ std::vector b =
+ loadVector1d(gguf, ggufCtx, bnPrefix + ".bias");
+ std::vector m =
+ loadVector1d(gguf, ggufCtx, bnPrefix + ".running_mean");
+ std::vector v =
+ loadVector1d(gguf, ggufCtx, bnPrefix + ".running_var");
+ const size_t n = w.size();
+ if (b.size() != n || m.size() != n || v.size() != n) {
+ raise("BN param size mismatch for " + bnPrefix);
+ }
+ std::vector scale(n);
+ std::vector shift(n);
+ for (size_t i = 0; i < n; ++i) {
+ const float invStd = 1.0F / std::sqrt(v[i] + bnEps);
+ scale[i] = w[i] * invStd;
+ shift[i] = b[i] - m[i] * scale[i];
+ }
+ uploadF32(tensors.at(bnPrefix + ".scale"), scale);
+ uploadF32(tensors.at(bnPrefix + ".shift"), shift);
+ };
+
+ auto foldSeBias = [&](const std::string& biasName) {
+ std::vector b = loadVector1d(gguf, ggufCtx, biasName);
+ uploadF32(tensors.at(biasName + "_br"), b);
+ };
+
+ foldBn("features.0.1");
+ for (const BlockConfig& cfg : kBlocks) {
+ const std::string base = "features." + std::to_string(cfg.featuresIndex);
+ const bool hasExpand = cfg.expandedChannels != cfg.inputChannels;
+ int dwIdx = 0;
+ int seIdx = -1;
+ int projIdx = 0;
+ if (hasExpand) {
+ foldBn(base + ".block.0.1");
+ dwIdx = 1;
+ if (cfg.useSe) {
+ seIdx = 2;
+ projIdx = 3;
+ } else {
+ projIdx = 2;
+ }
+ } else {
+ if (cfg.useSe) {
+ seIdx = 1;
+ projIdx = 2;
+ } else {
+ projIdx = 1;
+ }
+ }
+ foldBn(base + ".block." + std::to_string(dwIdx) + ".1");
+ if (cfg.useSe) {
+ const std::string seBase = base + ".block." + std::to_string(seIdx);
+ foldSeBias(seBase + ".fc1.bias");
+ foldSeBias(seBase + ".fc2.bias");
+ }
+ foldBn(base + ".block." + std::to_string(projIdx) + ".1");
+ }
+ foldBn("features.12.1");
+
+ auto uploadClassifierTensor = [&](const std::string& name) {
+ std::vector buf = loadVector1d(gguf, ggufCtx, name);
+ uploadF32(tensors.at(name), buf);
+ };
+ uploadClassifierTensor("classifier.0.weight");
+ uploadClassifierTensor("classifier.0.bias");
+ uploadClassifierTensor("classifier.3.weight");
+ uploadClassifierTensor("classifier.3.bias");
+
+ return bundle;
+}
+
+ComputeGraph buildGraph(const WeightsBundle& weights, ggml_backend_t backend) {
+ ComputeGraph cg;
+ const size_t ctxSize = ggml_tensor_overhead() * 4096 + ggml_graph_overhead();
+ cg.ctx = std::unique_ptr(
+ ggml_init({ctxSize, nullptr, /*no_alloc=*/true}), ggml_free);
+ if (!cg.ctx) {
+ raise("Failed to allocate graph ggml context");
+ }
+ struct ggml_context* ctx = cg.ctx.get();
+
+ // WHCN: width, height, channels, batch.
+ cg.input =
+ ggml_new_tensor_4d(ctx, GGML_TYPE_F32, kInputHw, kInputHw, 3, 1);
+ ggml_set_name(cg.input, "input");
+
+ GraphBuilder gb{ctx, weights.tensors};
+
+ struct ggml_tensor* x = gb.convBnAct(
+ cg.input, "features.0.0", "features.0.1", /*stride=*/2, /*kernel=*/3,
+ /*activate=*/true, /*useHardswish=*/true);
+
+ int spatial = kInputHw / 2;
+
+ for (const BlockConfig& cfg : kBlocks) {
+ x = gb.invertedResidual(x, cfg, spatial);
+ if (cfg.stride == 2) {
+ spatial = (spatial + 1) / 2;
+ }
+ }
+
+ x = gb.convBnAct(
+ x, "features.12.0", "features.12.1", /*stride=*/1, /*kernel=*/1,
+ /*activate=*/true, /*useHardswish=*/true);
+
+ struct ggml_tensor* pooled = ggml_pool_2d(
+ ctx, x, GGML_OP_POOL_AVG, spatial, spatial, spatial, spatial, 0, 0);
+ struct ggml_tensor* flat = ggml_reshape_1d(ctx, pooled, kTailOutChannels);
+
+ struct ggml_tensor* fc0 = ggml_mul_mat(
+ ctx, gb.t("classifier.0.weight"), flat);
+ fc0 = ggml_add(ctx, fc0, gb.t("classifier.0.bias"));
+ fc0 = ggml_hardswish(ctx, fc0);
+
+ struct ggml_tensor* fc3 = ggml_mul_mat(
+ ctx, gb.t("classifier.3.weight"), fc0);
+ fc3 = ggml_add(ctx, fc3, gb.t("classifier.3.bias"));
+
+ cg.output = fc3;
+ ggml_set_name(cg.output, "logits");
+
+ // The warmup and process() paths both read sizeof(float)*kNumClasses
+ // bytes from cg.output; mismatch silently truncates or reads OOB.
+ if (ggml_nelements(cg.output) != static_cast(kNumClasses)) {
+ raise(
+ "Compute graph output has " +
+ std::to_string(ggml_nelements(cg.output)) +
+ " elements, expected " + std::to_string(kNumClasses) +
+ "; classifier wiring or GGUF weight shapes are inconsistent with "
+ "graph::kNumClasses");
+ }
+
+ cg.graph = ggml_new_graph_custom(ctx, 8192, /*grads=*/false);
+ ggml_build_forward_expand(cg.graph, cg.output);
+
+ cg.backendBuffer = ggml_backend_alloc_ctx_tensors(ctx, backend);
+ if (cg.backendBuffer == nullptr) {
+ raise("Failed to allocate backend buffer for compute graph");
+ }
+
+ return cg;
+}
+
+} // namespace classification_ggml::graph
diff --git a/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.hpp b/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.hpp
new file mode 100644
index 0000000000..3982354fec
--- /dev/null
+++ b/packages/classification-ggml/addon/src/model-interface/MobileNetGraph.hpp
@@ -0,0 +1,99 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace classification_ggml::graph {
+
+/// One torchvision MobileNetV3-Small `InvertedResidual` block.
+struct BlockConfig {
+ int featuresIndex; // 1..11, matches `features.N` in the GGUF
+ int inputChannels;
+ int expandedChannels;
+ int outputChannels;
+ int depthwiseKernel; // 3 or 5
+ int stride; // 1 or 2
+ bool useHardswish; // false = ReLU
+ bool useSe;
+ int seReducedChannels;
+};
+
+inline constexpr int kNumBlocks = 11;
+inline constexpr std::array kBlocks = {{
+ // idx inC expC outC k s hs se seR
+ {1, 16, 16, 16, 3, 2, false, true, 8},
+ {2, 16, 72, 24, 3, 2, false, false, 0},
+ {3, 24, 88, 24, 3, 1, false, false, 0},
+ {4, 24, 96, 40, 5, 2, true, true, 24},
+ {5, 40, 240, 40, 5, 1, true, true, 64},
+ {6, 40, 240, 40, 5, 1, true, true, 64},
+ {7, 40, 120, 48, 5, 1, true, true, 32},
+ {8, 48, 144, 48, 5, 1, true, true, 40},
+ {9, 48, 288, 96, 5, 2, true, true, 72},
+ {10, 96, 576, 96, 5, 1, true, true, 144},
+ {11, 96, 576, 96, 5, 1, true, true, 144},
+}};
+
+inline constexpr int kStemOutChannels = 16;
+inline constexpr int kTailOutChannels = 576;
+inline constexpr int kClassifierHidden = 1024;
+inline constexpr int kNumClasses = 3;
+inline constexpr float kBatchNormEpsilon = 0.001F;
+inline constexpr int kInputHw = 224;
+
+/// ggml context + nameโtensor map for every weight, plus the backing
+/// backend buffer. Lives for the entire model lifetime.
+struct WeightsBundle {
+ std::unique_ptr ctx{
+ nullptr, ggml_free};
+ std::unordered_map tensors;
+ ggml_backend_buffer_t backendBuffer = nullptr;
+
+ WeightsBundle() = default;
+ WeightsBundle(const WeightsBundle&) = delete;
+ WeightsBundle& operator=(const WeightsBundle&) = delete;
+ WeightsBundle(WeightsBundle&& other) noexcept;
+ WeightsBundle& operator=(WeightsBundle&& other) noexcept;
+ ~WeightsBundle();
+
+ void reset();
+};
+
+/// Compute graph + its ggml context. Input/output tensors are reused
+/// across classify() calls; only input pixel data is rewritten per call.
+struct ComputeGraph {
+ std::unique_ptr ctx{
+ nullptr, ggml_free};
+ struct ggml_cgraph* graph = nullptr;
+ struct ggml_tensor* input = nullptr;
+ struct ggml_tensor* output = nullptr;
+ ggml_backend_buffer_t backendBuffer = nullptr;
+
+ ComputeGraph() = default;
+ ComputeGraph(const ComputeGraph&) = delete;
+ ComputeGraph& operator=(const ComputeGraph&) = delete;
+ ComputeGraph(ComputeGraph&& other) noexcept;
+ ComputeGraph& operator=(ComputeGraph&& other) noexcept;
+ ~ComputeGraph();
+
+ void reset();
+};
+
+/// Loads every tensor + the `mobilenet.class_N` labels from a GGUF file.
+/// `outLabels` is left empty if the metadata keys are not present.
+WeightsBundle loadWeights(
+ const std::string& ggufPath, ggml_backend_t backend,
+ std::vector& outLabels);
+
+/// Build the MobileNetV3-Small forward graph. Caller writes pixels into
+/// `graph.input` via `ggml_backend_tensor_set` before each compute.
+ComputeGraph buildGraph(const WeightsBundle& weights, ggml_backend_t backend);
+
+} // namespace classification_ggml::graph
diff --git a/packages/classification-ggml/binding.js b/packages/classification-ggml/binding.js
new file mode 100644
index 0000000000..cea46308c0
--- /dev/null
+++ b/packages/classification-ggml/binding.js
@@ -0,0 +1 @@
+module.exports = require.addon()
diff --git a/packages/classification-ggml/docs/architecture.md b/packages/classification-ggml/docs/architecture.md
new file mode 100644
index 0000000000..0c60092ea5
--- /dev/null
+++ b/packages/classification-ggml/docs/architecture.md
@@ -0,0 +1,190 @@
+# Architecture โ `@qvac/classification-ggml`
+
+This document describes the architecture of the MobileNetV3-Small 3-class
+image classification addon, the GGML compute graph it constructs, and the
+rationale for the key implementation choices.
+
+## Component layout
+
+```
++----------------------------------------------+
+| JS: ImageClassifier (index.js) |
+| - lifecycle (load / classify / unload) |
+| all serialised via exclusiveRunQueue |
+| - createJobHandler + QvacResponse plumbing |
+| - thin pass-through to native validation |
++----------------------------------------------+
+| JS: ClassificationInterface (addon.js) |
+| - thin native bridge: createInstance, |
+| activate, runJob, cancel, unload |
+| - exports mapAddonEvent for index.js |
+| (shape-keyed Output / JobEnded routing) |
++----------------------------------------------+
+| Native: BARE_MODULE (binding.cpp) |
+| - exports createInstance/runJob/activateโฆ |
++----------------------------------------------+
+| Native: AddonJs (addon/AddonJs.hpp) |
+| - js <-> C++ bridge |
+| - single source of truth for argument |
+| validation (type / range / shape) |
+| - packs ClassifyInput (vector + |
+| optional + topK) |
+| - JsClassifyOutputHandler โ JS array |
++----------------------------------------------+
+| Native: AddonCpp (from @qvac/โฆ-addon-cpp) |
+| - JobRunner (dedicated worker thread) |
+| - OutputQueue + OutputCallback plumbing |
++----------------------------------------------+
+| Native: ClassificationModel (IModel) |
+| - load(): backend init + weights + graph |
+| + full-pipeline warmup pass |
+| desktop/iOS: ggml_backend_cpu_init() |
+| android : load_all_from_path() + |
+| dev_by_type(CPU) + dev_init |
+| - process(): preprocess โ compute โ softmax|
++----------------------------------------------+
+| Native: MobileNetGraph |
+| - loadWeights(): GGUF โ folded BN + FC F32 |
+| (validates mobilenet.num_classes) |
+| - buildGraph(): static forward compute |
+| graph wired to pre-allocated buffers |
+| (asserts ggml_nelements(output) == |
+| kNumClasses before allocation) |
++----------------------------------------------+
+| libggml (CPU backend only, via qvac-fabric) |
+| - desktop/iOS: CPU statically linked into |
+| the .bare |
+| - android: per-microarch CPU MODULE .so |
+| ships next to the .bare under |
+| prebuilds/android-arm64/qvac__โฆ/ |
++----------------------------------------------+
+```
+
+## MobileNetV3-Small layer list
+
+The graph matches `torchvision.models.mobilenet_v3_small` with the bundled
+3-class classifier head. Spatial dimensions start at `224ร224` and halve
+at each stride-2 layer.
+
+| Stage | Op | In | Out | Spatial |
+|----------------|---------------------------------------|-------|-------|---------|
+| `features.0` | Conv2dBN + HardSwish (3ร3, s=2) | 3 | 16 | 112 |
+| `features.1` | InvertedResidual (DW 3ร3 s=2, SE, ReLU) | 16 | 16 | 56 |
+| `features.2` | InvertedResidual (expโ72, DW 3ร3 s=2, ReLU) | 16 | 24 | 28 |
+| `features.3` | InvertedResidual (expโ88, DW 3ร3 s=1, ReLU, +) | 24 | 24 | 28 |
+| `features.4` | InvertedResidual (expโ96, DW 5ร5 s=2, SE, HS) | 24 | 40 | 14 |
+| `features.5` | InvertedResidual (expโ240, DW 5ร5 s=1, SE, HS, +) | 40 | 40 | 14 |
+| `features.6` | InvertedResidual (expโ240, DW 5ร5 s=1, SE, HS, +) | 40 | 40 | 14 |
+| `features.7` | InvertedResidual (expโ120, DW 5ร5 s=1, SE, HS) | 40 | 48 | 14 |
+| `features.8` | InvertedResidual (expโ144, DW 5ร5 s=1, SE, HS, +) | 48 | 48 | 14 |
+| `features.9` | InvertedResidual (expโ288, DW 5ร5 s=2, SE, HS) | 48 | 96 | 7 |
+| `features.10` | InvertedResidual (expโ576, DW 5ร5 s=1, SE, HS, +) | 96 | 96 | 7 |
+| `features.11` | InvertedResidual (expโ576, DW 5ร5 s=1, SE, HS, +) | 96 | 96 | 7 |
+| `features.12` | Conv2dBN + HardSwish (1ร1) | 96 | 576 | 7 |
+| avg-pool | GlobalAveragePool | 576 | 576 | 1 |
+| `classifier.0` | Linear + HardSwish | 576 | 1024 | 1 |
+| `classifier.3` | Linear | 1024 | 3 | 1 |
+
+Totals: **34 conv layers** (1 stem + 11 ร {1 or 2 1ร1 + 1 DW} + 1 tail)
+and **2 linear layers** in the classifier. `+` marks the residual add
+(applied when `stride == 1` and `inputChannels == outputChannels`).
+
+## GGML graph construction
+
+### Weight loading
+
+`MobileNetGraph::loadWeights()` opens the GGUF file via
+`gguf_init_from_file()` and clones every required tensor into a freshly
+allocated `ggml_context` that is backed by a CPU backend buffer (allocated
+with `ggml_backend_alloc_ctx_tensors`).
+
+Weights are transformed at load time into two layouts:
+
+1. **Raw FP16** (`cloneRaw`) for conv kernels and SE FC kernels โ the
+ native `ggml_conv_2d` / `ggml_conv_2d_dw` paths accept F16 kernels
+ against an F32 input on the CPU backend.
+2. **Folded FP32 BN scale/shift** (`cloneAsFp32` + second pass) for every
+ BatchNorm layer. At load time we compute:
+
+ ```
+ scale_c = weight_c / sqrt(running_var_c + 0.001)
+ shift_c = bias_c - running_mean_c * scale_c
+ ```
+
+ and store `scale[1,1,C,1]` and `shift[1,1,C,1]` tensors. The forward
+ graph then applies BN as a single `ggml_mul` + `ggml_add` broadcast.
+
+This fold avoids 34 ร 4 ops (`sub`, `div`, `mul`, `add`) per inference and
+sidesteps the classic `eps = 1e-5` mistake by computing the division
+exactly once against the GGUF-supplied `0.001`.
+
+Classifier FC weights and biases are promoted to FP32 on load for
+numerical stability of the tiny 3-element logits tail.
+
+### Forward graph
+
+`MobileNetGraph::buildGraph()` builds a static graph in a second
+`ggml_context` with `no_alloc = true`. The graph is allocated on the
+backend once, wiring up:
+
+- `input` tensor `[W=224, H=224, C=3, N=1] F32`
+- Stem conv + BN + HardSwish
+- 11 `InvertedResidual` blocks (`GraphBuilder::invertedResidual`)
+- Tail conv + BN + HardSwish
+- Global average pool (`ggml_pool_2d` with kernel == spatial extent)
+- Reshape to 1-D (576)
+- `classifier.0.weight` linear + bias + HardSwish
+- `classifier.3.weight` linear + bias โ logits
+
+The graph is captured via `ggml_new_graph_custom` + `ggml_build_forward_expand`.
+
+### Per-inference path
+
+`ClassificationModel::process()`:
+
+1. Preprocess the image buffer to a 224ร224ร3 FP32 WHCN tensor.
+2. `ggml_backend_tensor_set(input, fp32Buffer)` โ copies pixels only.
+3. `ggml_backend_graph_compute(backend, graph)`.
+4. `ggml_backend_tensor_get(output, logits)`.
+5. Numerically stable softmax over 3 logits in C++.
+6. Build sorted `ClassifyResult` list, apply `topK`, return.
+
+Nothing allocates tensors in the hot path; the only per-call work is the
+pixel copy, the compute itself, the 3-element softmax, and label lookup.
+
+## Threading model
+
+- Each `ClassificationModel` instance owns its own `JobRunner` worker
+ thread (inherited from `qvac-lib-inference-addon-cpp`), so concurrent
+ `classify()` calls are serialized per instance but independent across
+ instances โ supporting acceptance criterion N6.
+- The JS-side `exclusiveRunQueue()` (mirroring `LlmLlamacpp`) further
+ serialises `load`, `classify`, and `unload` per `ImageClassifier`
+ instance, so a `unload()` racing an in-flight `classify()` queues
+ cleanly behind it (and explicitly cancels then fails the in-flight
+ request with `Model was unloaded`).
+- Per-inference mutex (`ClassificationModel::mutex_`) guards against a
+ torn state if a future user bypasses `JobRunner`.
+- The CPU compute thread count is left at libggml's default
+ (`std::thread::hardware_concurrency`) on every platform; the addon
+ does not expose a `threads` knob. Rationale: on Apple/Linux/Windows
+ desktop the symbol `ggml_backend_cpu_set_n_threads` is statically
+ linkable, but on Android the CPU backend is loaded as a per-microarch
+ MODULE `.so` (`GGML_CPU_ALL_VARIANTS=ON`) where the setter is not
+ resolvable from the addon's `.bare`, so a `threads` config value
+ could only ever apply on a subset of platforms. We picked
+ consistency and dropped the knob altogether.
+
+## Memory footprint
+
+- Weights on the CPU backend: โ `2.94 MB` + โ `60 KB` of folded BN scale/
+ shift + FP32 classifier FC (โ `2.5 MB`) โ **5.5 MB total** in memory.
+- Compute buffer (intermediate activations): single-digit MB for a
+ 224ร224 input โ allocated once at `load()` time.
+- No heap allocation inside the hot path.
+
+## References
+
+- Howard et al., *Searching for MobileNetV3*, arXiv:1905.02244, 2019.
+- `torchvision.models.mobilenet_v3_small` โ reference architecture.
+- GGML public API: `ggml.h`, `ggml-backend.h`, `ggml-alloc.h`, `gguf.h`.
diff --git a/packages/classification-ggml/docs/data-flow.md b/packages/classification-ggml/docs/data-flow.md
new file mode 100644
index 0000000000..310561f9c0
--- /dev/null
+++ b/packages/classification-ggml/docs/data-flow.md
@@ -0,0 +1,191 @@
+# Data flow โ `@qvac/classification-ggml`
+
+End-to-end trace of a single `classifier.classify(buffer)` call.
+
+```
++-------------------------+
+| Caller |
+| classifier.classify( |
+| imageBuffer, opts) |
++-----------+-------------+
+ | JPEG/PNG/raw RGB bytes + {topK?, width?, height?, channels?}
+ v
++-------------------------+
+| ImageClassifier (JS) |
+| - lifecycle gates |
+| (load / classify / |
+| unload all serialised|
+| via exclusiveRunQueue)|
+| - thin pass-through: |
+| builds native job |
+| { type: 'image', |
+| content: buf, |
+| width?, height?, |
+| channels?, topK? } |
++-----------+-------------+
+ |
+ v
++-------------------------+
+| ClassificationInterface|
+| (addon.js) |
+| - createInstance once |
+| - binding.runJob(...) |
+| - native events fan |
+| out via mapAddonEvent|
++-----------+-------------+
+ |
+ v
++-------------------------+
+| Native binding.cpp |
+| - resolves addon handle|
+| - calls js::runJob |
++-----------+-------------+
+ |
+ v
++-------------------------+
+| AddonJs::runJob (C++) |
+| Single source of truth |
+| for argument validation|
+| - type === 'image' |
+| - content is TypedArray|
+| - width/height/channels|
+| all-or-nothing trio |
+| - topK > 0 if provided |
+| - bare-runtime int32 |
+| range checks |
+| Throws StatusError |
+| (InvalidArgument) on |
+| any violation. |
+| - packs ClassifyInput |
+| (vector + |
+| optional |
+| + topK) |
+| - AddonCpp.runJob(any) |
++-----------+-------------+
+ |
+ | ClassifyInput
+ v
++-------------------------+
+| JobRunner worker thread|
+| - pops job |
+| - model->process(any) |
++-----------+-------------+
+ |
+ v
++-------------------------+
+| ClassificationModel:: |
+| process() |
+| |
+| 1) preprocessToTensor |
+| (stb_image decode + |
+| bilinear resize + |
+| ImageNet normalize)|
+| |
+| 2) ggml_backend_tensor_|
+| set(input, fp32buf) |
+| |
+| 3) ggml_backend_graph_ |
+| compute(backend, g) |
+| |
+| 4) ggml_backend_tensor_|
+| get(output, logits) |
+| |
+| 5) softmax (C++) |
+| |
+| 6) build sorted result |
++-----------+-------------+
+ |
+ | ClassifyOutput (std::any)
+ v
++-------------------------+
+| OutputQueue โ Output |
+| CallbackJs โ JS |
+| _outputCallback(event, |
+| data, error) |
++-----------+-------------+
+ |
+ v
++-------------------------+
+| JsClassifyOutputHandler|
+| ClassifyOutput โ JS |
+| Array<{label, confid.}>|
++-----------+-------------+
+ |
+ v
++-------------------------+
+| ImageClassifier (JS) |
+| _job.end() on terminal |
+| โ response.await() |
+| resolves with |
+| collected[0] |
++-----------+-------------+
+ |
+ v
++-------------------------+
+| Caller awaits result |
+| [{label, confidence}] |
++-------------------------+
+```
+
+## Error paths
+
+| Failure | Where | Surface behaviour |
+|---------------------------------------------|-----------------------------------------------|-------------------|
+| `null` / non-Buffer / non-Uint8Array input | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "Image 'content' is required and must be a Uint8Array / Buffer โฆ" |
+| Empty buffer | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "Image 'content' buffer is empty" |
+| Unsupported format (BMP, text, โฆ) | `ImagePreprocessor::isEncodedImage` (C++) | `StatusError(InvalidArgument)` โ "Unsupported image format: expected JPEG or PNG โฆ" |
+| Corrupted JPEG / PNG | `ImagePreprocessor::decodeToRgb` (C++) | `StatusError(InvalidArgument)` surfaced as JS `Error` |
+| Raw bytes + missing one of width/height/channels | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "Raw RGB input requires all of 'width', 'height', and 'channels' โฆ" |
+| Raw bytes + non-positive width / height | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "must be a positive integer when passing raw RGB bytes" |
+| Raw bytes + channels โ 3 | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "must be exactly 3 (RGB) when passing raw RGB bytes" |
+| Buffer size mismatch (raw input) | `ImagePreprocessor::validateRawRgb` (C++) | `StatusError(InvalidArgument)` |
+| `topK โค 0` when provided | `AddonJs::runJob` (C++) | `StatusError(InvalidArgument)` โ "must be a positive integer when provided" |
+| Missing `config.backendsDir` on Android | `ClassificationModel::load` (C++, Android) | `StatusError(InvalidArgument)` โ "Configuration 'config.backendsDir' is required on Android"; `index.js` defaults it to `path.join(__dirname, 'prebuilds')` so this only fires when the addon is wired up by hand |
+| GGML CPU backend variant init failure | `ClassificationModel::load` (C++, Android) | `StatusError(InternalError)` โ "Failed to find/init CPU backend device"; raised when `ggml_backend_load_all_from_path` couldn't enumerate any per-microarch variant under `//` |
+| `classify` before `load` | `ImageClassifier._classifyInternal` (JS) | `Error("Classifier not loaded. Call load() first.")` |
+| `classify` after `unload` | `ImageClassifier._classifyInternal` (JS) | same |
+| `unload` mid-classify | `ImageClassifier.unload` (JS) | the in-flight `classify()` promise rejects with `Error("Model was unloaded")` |
+| GGUF weights file missing | `ImageClassifier._load` (JS) | `Error("MobileNet GGUF weights not found at: โฆ")` |
+| GGUF `mobilenet.num_classes` mismatch | `MobileNetGraph::loadWeights` (C++) | `StatusError(InvalidArgument)` โ "does not match the addon's compiled-in class count" |
+| Compute graph output shape mismatch | `MobileNetGraph::buildGraph` (C++) | `StatusError(InternalError)` โ defence-in-depth, never seen in practice |
+| `ggml_backend_graph_compute` non-success | `ClassificationModel::process` (C++) | `StatusError(InternalError)` |
+
+All errors are wrapped by the existing `qvac-lib-inference-addon-cpp`
+error infrastructure and reach the caller as structured JS Errors. Native
+code never aborts on bad input โ this is validated by the error-case
+integration tests in `test/integration/error-cases.test.js` and by the
+preprocessor / model unit tests in `test/unit/*.cpp`.
+
+## Lifecycle
+
+```
+new ImageClassifier()
+ โ
+ โ .load()
+ โผ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ ClassificationModel::load โ
+โ backend init: โ
+โ desktop : ggml_backend_cpu_init() โ
+โ android : ggml_backend_load_all_from_ โ
+โ path(/) โ โ
+โ ggml_backend_dev_by_type(CPU) โ
+โ โ ggml_backend_dev_init โ
+โ weights = loadWeights(gguf, backend) โ
+โ graph = buildGraph(weights, backend) โ
+โ loaded = true โ
+โโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ โ many .classify(โฆ) calls โ pixel data only per-call
+ โ
+ โ .unload()
+ โผ
+โโโโโโโโโโโโโโโโโโโโโโโโโ
+โ destroyInstance() โ
+โ ~AddonJs โ ~AddonCppโ
+โ ~ClassificationModelโ
+โ ggml_backend_free โ
+โโโโโโโโโโโโโโโโโโโโโโโโโ
+```
+
+Repeated load/unload cycles do not leak native handles โ validated by
+`error-cases.test.js: load -> unload -> load cycles do not leak handles`.
diff --git a/packages/classification-ggml/docs/onnx-to-gguf-conversion.md b/packages/classification-ggml/docs/onnx-to-gguf-conversion.md
new file mode 100644
index 0000000000..d4ae4c6a8d
--- /dev/null
+++ b/packages/classification-ggml/docs/onnx-to-gguf-conversion.md
@@ -0,0 +1,167 @@
+# ONNX to GGUF Conversion (Public-safe)
+
+This document describes a public-safe conversion workflow for preparing
+MobileNetV3-Small weights for this addon.
+
+## 1) Export PyTorch to ONNX
+
+- Freeze model in eval mode.
+- Fold BatchNorm where practical.
+- Export with fixed input shape `1x3x224x224`.
+
+## 2) Convert ONNX tensors into GGUF
+
+- Write tensor weights into a GGUF container.
+- Store model labels in metadata (`mobilenet.class_*` keys).
+- Use FP16 for shipped runtime weights.
+
+## 3) Verify numerics internally
+
+- Compare ONNX and GGUF runtime logits on a private validation corpus.
+- Ensure argmax agreement and tight per-logit tolerance.
+
+## 4) Integrate with addon
+
+- Replace `weights/mobilenetv3_3class_v3_fp16.gguf`.
+- Keep API output unchanged (`[{ label, confidence }]`).
+
+## Known pitfalls
+
+- BatchNorm epsilon must match training/export settings.
+- Depthwise conv paths require exact tensor shape/layout wiring.
+- Aggressive quantization can degrade CNN quality.
+# Converting a MobileNetV3-Small model to the GGUF format used by this addon
+
+This guide describes how to convert a retrained (or freshly exported)
+MobileNetV3-Small PyTorch model into the GGUF container consumed by
+`@qvac/classification-ggml`. It is intentionally minimal โ the graph
+construction in `MobileNetGraph.cpp` is parameterised only by the block
+table `kBlocks` and the label metadata inside the GGUF, so swapping in
+new classes (or a different fine-tune) does not require any C++ changes
+as long as the architecture stays MobileNetV3-Small.
+
+> The bundled weights in `weights/mobilenetv3_3class_v3_fp16.gguf` were
+> produced by this exact pipeline. FP16 is numerically identical to the
+> ONNX FP32 reference on representative inputs.
+
+## 1. Export from PyTorch to ONNX
+
+```python
+import torch
+from torchvision.models import mobilenet_v3_small
+
+model = mobilenet_v3_small(weights=None)
+# Replace the 1000-class head with an N-class head that matches your target classes.
+model.classifier[3] = torch.nn.Linear(1024, NUM_CLASSES)
+model.load_state_dict(torch.load("your_finetuned_weights.pth"))
+model.eval()
+
+dummy = torch.randn(1, 3, 224, 224)
+torch.onnx.export(
+ model,
+ dummy,
+ "mobilenetv3_small.onnx",
+ input_names=["input"],
+ output_names=["logits"],
+ opset_version=17,
+)
+```
+
+Notes:
+
+- Export the model in **inference mode**. `model.eval()` is mandatory:
+ it puts BatchNorm into running-statistics mode.
+- Do **not** fold BatchNorm into conv at ONNX export time. This addon
+ folds BN at load time inside the C++ code using the GGUF-supplied
+ `running_mean`, `running_var`, `weight`, `bias` โ it needs the raw
+ BN parameters to exist in the file.
+
+## 2. Convert ONNX weights to GGUF
+
+The conversion script used for the bundled model produces a GGUF with
+the torchvision tensor naming preserved verbatim (`features.0.0.weight`,
+`features.1.block.0.0.weight`, โฆ, `classifier.3.bias`). Any converter
+that emits the same tensor names and the required metadata keys works.
+
+Required GGUF **tensor layout**:
+
+- Conv kernels: `[KW, KH, IC, OC]` (ggml convention, matches
+ `torch.Tensor` export when dims are reversed).
+- Depthwise conv kernels: `[KW, KH, 1, C]`.
+- SE `fc1` / `fc2`: `[1, 1, IC, OC]` (1ร1 convs, not Linear).
+- Classifier `classifier.0.weight`: `[576, 1024]`.
+- Classifier `classifier.3.weight`: `[1024, NUM_CLASSES]`.
+- BN tensors (`weight`, `bias`, `running_mean`, `running_var`): `[C]`
+ 1-D. `num_batches_tracked` is accepted but ignored.
+
+Required GGUF **metadata keys**:
+
+| Key | Type | Example |
+|-----|------|---------|
+| `general.architecture` | string | `"mobilenetv3-small"` |
+| `general.description` | string | `"MobileNetV3-Small 3-class FP16"` |
+| `mobilenet.architecture` | string | `"mobilenetv3_small"` |
+| `mobilenet.num_classes` | uint32 | `3` |
+| `mobilenet.image_size` | uint32 | `224` |
+| `mobilenet.class_0` | string | `"food"` |
+| `mobilenet.class_1` | string | `"report"` |
+| `mobilenet.class_2` | string | `"other"` |
+| `mobilenet.mean_r/g/b` | float32 | `0.485 / 0.456 / 0.406` |
+| `mobilenet.std_r/g/b` | float32 | `0.229 / 0.224 / 0.225` |
+| `mobilenet.bn_eps` | float32 | **`0.001`** (required โ see below) |
+| `mobilenet.precision` | string | `"fp16"` or `"fp32"` |
+
+Quantization choice:
+
+- **FP16** is the target for shipping; FP16 produces numerically
+ identical predictions to the FP32 reference on representative inputs.
+- **FP32** is supported for debugging. Twice the file size, same output.
+- **INT8 / Q4_0 are destructive** for MobileNetV3-Small because
+ depthwise convolutions have only 9โ25 weights per channel. Sub-8-bit
+ quantization introduces unacceptable error on these layers. Do not
+ ship quantized variants.
+
+## 3. Verify numerical equivalence
+
+Run the C++ addon against your internal set of reference images and
+compare logits to the ONNX reference:
+
+```
+| logit_difference | < 1e-4 per class, FP32
+| argmax agreement | must match the ONNX reference on every image
+```
+
+`test/integration/classify.test.js` and
+`test/unit/classification_model_test.cpp` cover the shape contract and
+the per-image argmax on the 6 public sample images shipped in
+`test/images/`. The per-image logit-diff check against ONNX is done
+with an external script during development (not bundled in this
+package because it requires PyTorch / onnxruntime) and must not embed
+any private validation data into the public package.
+
+## 4. Update the bundled weights
+
+1. Place the new `.gguf` in `packages/classification-ggml/weights/`.
+2. Keep the filename identical (`mobilenetv3_3class_v3_fp16.gguf`) or
+ update `DEFAULT_WEIGHTS_FILENAME` in `index.js`.
+3. Bump the package version (`package.json` + `CHANGELOG.md`).
+4. Re-run `npm run test:integration` and `npm run test:cpp`.
+
+## 5. Supporting a new block table (advanced)
+
+If you switch to a different MobileNet variant (V3-Large, V4, etc.),
+update `kBlocks` in `MobileNetGraph.hpp` to reflect the new
+expand/project channels, kernel sizes, strides, SE reducer sizes,
+and HardSwish/ReLU flags. The graph construction loop iterates over
+`kBlocks`; no other change is required as long as the GGUF tensor
+naming follows `features.