diff --git a/packages/transcription-parakeet/CHANGELOG.md b/packages/transcription-parakeet/CHANGELOG.md index b289f682c2..c1967ce543 100644 --- a/packages/transcription-parakeet/CHANGELOG.md +++ b/packages/transcription-parakeet/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **Android: re-enable GPU (Vulkan/OpenCL) for Parakeet — overlay-only, for device-farm validation (QVAC-20556).** Removes the `ParakeetModel::load` `__ANDROID__` guard that forced `useGPU=false`, widens the addon's Android backend-staging glob to bundle the `libqvac-speech-ggml-{vulkan,opencl}.so` MODULE libs (reversing the `[0.7.2]` CPU-only packaging), and lifts the Android early-pass skips in the `gpu-smoke` integration test so its strict backend assertion (`backendDevice=1`, `backendId` Vulkan/OpenCL) runs on real devices. Builds against `ggml-speech@44fd4817` + `parakeet-cpp@ed749556` (whisper.cpp `master`) via in-package overlay ports; the registry baseline and registry `version>=` pins are unchanged. This is the inverse of the CPU-only workaround in #2525 — its purpose is to get an empirical AWS Device Farm signal (Pixel 9/Mali + S25/Adreno 830) on whether the latest speech stack drives those GPUs. **Not for merge as-is.** - Bumped the `parakeet-cpp` `version>=` constraint to `2026-06-10` (whisper.cpp `1c75d6e9`), which refreshes the bundled `ggml-speech` to the current speech-branch tip `bec032cd`. The registry baseline is left unchanged. The `parakeet-cpp` C++ tree is unchanged since the previous `128dae42` pin, so this only moves `ggml-speech`; prebuilds and the desktop RTF benchmark now build against the latest speech stack (QVAC-20614). - Performance reports now surface the desktop GPU hardware name. `test/integration/helpers.js` injects `bare-subprocess` into the shared performance reporter's `configure()` so `_detectGpu()` can shell out to nvidia-smi / vulkaninfo / system_profiler and populate `device.gpu` (e.g. "NVIDIA RTX 4000 SFF Ada") on GPU desktop runners (QVAC-20499). Mobile (Device Farm) reports continue to leave `device.gpu` null — the device name is the proxy there. - RTF benchmark now reports GGML backends. `test/benchmark/rtf-benchmark.test.js` resolves the requested GPU backend family to the parakeet.cpp cascade (Metal on darwin/ios, Vulkan on linux/win32, Vulkan/OpenCL on android) instead of the stale ONNX names (coreml/nnapi/auto-gpu), and now captures the *actual* backend the engine ran on via `stats.backendId` / `stats.backendDevice` (`labels.activeBackend`, `summary.backendId`). `scripts/perf-report/aggregate-parakeet-rtf.js` GPU-backend coverage map updated to the GGML set (vulkan/metal/opencl/cuda). diff --git a/packages/transcription-parakeet/CMakeLists.txt b/packages/transcription-parakeet/CMakeLists.txt index 09a705299d..429bcd2bdf 100644 --- a/packages/transcription-parakeet/CMakeLists.txt +++ b/packages/transcription-parakeet/CMakeLists.txt @@ -67,9 +67,9 @@ set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}") message(STATUS "Building qvac_lib_infer_parakeet with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'") # Collect every dynamically-loadable ggml backend surfaced by the -# ggml-speech port. On Android this is intentionally CPU-only while -# Parakeet forces useGPU=false; nothing is staged on Apple/static-only -# platforms. cmake-bare's +# ggml-speech port as an IMPORTED target. On Android the IMPORTED set is +# CPU-only (Vulkan/OpenCL ship as loose MODULE .so -- staged below); +# nothing is staged on Apple/static-only platforms. cmake-bare's # `add_bare_module(... EXPORTS INSTALL TARGET )` bundles each # IMPORTED ggml:: target into the per-arch prebuilds folder # next to the `.bare` module. Mirrors qvac/packages/llm-llamacpp/ @@ -83,8 +83,9 @@ message(STATUS "Building qvac_lib_infer_parakeet with BACKENDS_SUBDIR='${BACKEND # but ggml-config.cmake.in deliberately leaves them out of # GGML_AVAILABLE_BACKENDS' IMPORTED set (the loader is expected to # find them via `ggml_backend_load_all_from_path()` at runtime). -# We do not pick those GPU modules up on Android until their discovery -# path is stable on Mali/Adreno devices. +# We stage those GPU modules on Android too (loose-glob below) so the +# engine can select Vulkan/OpenCL per ggml's Adreno-tier policy when +# useGPU=true; ggml falls back to CPU on devices it can't drive. set(BACKEND_DL_LIBS "") if((ANDROID OR UNIX) AND NOT APPLE) foreach(_backend ${GGML_AVAILABLE_BACKENDS}) @@ -95,12 +96,11 @@ if((ANDROID OR UNIX) AND NOT APPLE) endif() # Loose-file pickup for MODULE backends that aren't surfaced as -# IMPORTED targets by find_package(ggml). On Android we intentionally -# stage CPU backends only while Parakeet forces useGPU=false; loading -# Vulkan/OpenCL during backend discovery is known to abort on some -# Mali/Adreno devices before inference can fall back to CPU. Linux keeps -# the wider glob as a fallback for any future hybrid build there. Glob -# runs at configure time -- the .so files come from ggml-speech's +# IMPORTED targets by find_package(ggml). On Android (as on Linux) we +# stage every ggml-speech backend .so -- CPU per-arch variants plus the +# Vulkan/OpenCL MODULE libs -- so `ggml_backend_load_all_from_path()` +# can discover and select a GPU backend at runtime when useGPU=true. +# Glob runs at configure time -- the .so files come from ggml-speech's # `file(INSTALL ...)` step in its portfile, which has already completed # by the time vcpkg hands control back to us. Apple / Windows # static-only configs return an empty list and skip the install rule. @@ -112,13 +112,8 @@ if((ANDROID OR UNIX) AND NOT APPLE) "ggml backend .so pickup -- the runtime may fail to find " "Android CPU backend modules.") else() - if(ANDROID) - file(GLOB BACKEND_DL_LOOSE_SOS - "${VCPKG_INSTALLED_PATH}/lib/libqvac-speech-ggml-cpu-*.so") - else() - file(GLOB BACKEND_DL_LOOSE_SOS - "${VCPKG_INSTALLED_PATH}/lib/libqvac-speech-ggml-*.so") - endif() + file(GLOB BACKEND_DL_LOOSE_SOS + "${VCPKG_INSTALLED_PATH}/lib/libqvac-speech-ggml-*.so") foreach(_lib IN LISTS BACKEND_DL_LOOSE_SOS) message(STATUS "qvac_lib_infer_parakeet: will stage ggml backend ${_lib}") endforeach() diff --git a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp index adf4a736a6..29c93bc31f 100644 --- a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp +++ b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp @@ -221,18 +221,6 @@ void ParakeetModel::cleanupTempFile_() { void ParakeetModel::load() { if (is_loaded_) return; - // Force useGPU to false in Android until Vulkan and OpenCL are stabilized -#ifdef __ANDROID__ - if (cfg_.useGPU) { - QLOG( - logger::Priority::WARNING, - "Parakeet: useGPU=true is currently ignored on Android " - "(GPU backends disabled at engine boundary pending Vulkan/Mali " - "and OpenCL/Adreno driver fixes); falling back to CPU."); - cfg_.useGPU = false; - } -#endif - QLOG(logger::Priority::INFO, "Loading Parakeet GGUF (modelType hint: " + std::to_string(static_cast(cfg_.modelType)) + ")"); diff --git a/packages/transcription-parakeet/test/integration/gpu-smoke.test.js b/packages/transcription-parakeet/test/integration/gpu-smoke.test.js index 090049f7be..2f4863aa78 100644 --- a/packages/transcription-parakeet/test/integration/gpu-smoke.test.js +++ b/packages/transcription-parakeet/test/integration/gpu-smoke.test.js @@ -193,7 +193,6 @@ async function runGpuModelTest (t, modelType, modelPath, audio, expectations) { } test('CTC GPU smoke — useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => { - if (platform === 'android') { t.pass('Android: GPU disabled at engine boundary pending Vulkan/Mali + OpenCL/Adreno upstream fixes'); return } const loggerBinding = setupJsLogger(binding) try { const modelPath = await loadGgufOrSkip(t, 'ctc') @@ -207,7 +206,6 @@ test('CTC GPU smoke — useGPU=true must engage the GPU backend on GPU-capable p }) test('TDT GPU smoke — useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => { - if (platform === 'android') { t.pass('Android: GPU disabled at engine boundary pending Vulkan/Mali + OpenCL/Adreno upstream fixes'); return } const loggerBinding = setupJsLogger(binding) try { const modelPath = await loadGgufOrSkip(t, 'tdt') @@ -225,7 +223,6 @@ test('TDT GPU smoke — useGPU=true must engage the GPU backend on GPU-capable p // the zero-token regression triggered by ggml-metal's Q-variant // mul_mv + bias/residual fusion on the EOU q8_0 joint network. test('EOU GPU smoke — useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => { - if (platform === 'android') { t.pass('Android: GPU disabled at engine boundary pending Vulkan/Mali + OpenCL/Adreno upstream fixes'); return } const loggerBinding = setupJsLogger(binding) try { const modelPath = await loadGgufOrSkip(t, 'eou') @@ -239,7 +236,6 @@ test('EOU GPU smoke — useGPU=true must engage the GPU backend on GPU-capable p }) test('Sortformer GPU smoke — useGPU=true must engage the GPU backend on GPU-capable platforms', { timeout: 600000, skip: NO_GPU }, async (t) => { - if (platform === 'android') { t.pass('Android: GPU disabled at engine boundary pending Vulkan/Mali + OpenCL/Adreno upstream fixes'); return } const loggerBinding = setupJsLogger(binding) try { const modelPath = await loadGgufOrSkip(t, 'sortformer') diff --git a/packages/transcription-parakeet/vcpkg-configuration.json b/packages/transcription-parakeet/vcpkg-configuration.json index 04e7630640..efaabb8d29 100644 --- a/packages/transcription-parakeet/vcpkg-configuration.json +++ b/packages/transcription-parakeet/vcpkg-configuration.json @@ -4,6 +4,9 @@ "baseline": "74d2dfd03d1c2c0767bac6d892ec43a2a0e29c10", "repository": "https://github.com/tetherto/qvac-registry-vcpkg.git" }, + "overlay-ports": [ + "./vcpkg-overlay-ports" + ], "registries": [ { "kind": "git", diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake new file mode 100644 index 0000000000..16ac7c0826 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake @@ -0,0 +1,37 @@ +# Detect the Vulkan version shipped with the Android NDK by parsing +# vulkan_core.h from the NDK sysroot. Sets `vulkan_version` in the +# caller's scope (e.g. "1.3.275"). +function(detect_ndk_vulkan_version) + string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower) + + file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*") + if(host_dirs) + list(GET host_dirs 0 host_dir) + get_filename_component(host_arch "${host_dir}" NAME) + set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h") + else() + message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}") + endif() + + if(NOT EXISTS "${vulkan_core_h}") + message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}") + endif() + + file(READ "${vulkan_core_h}" header_content) + string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}") + if(version_match) + set(header_version_3 "${CMAKE_MATCH_1}") + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}") + endif() + + # Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL + string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}") + if(version_match) + set(major "${CMAKE_MATCH_2}") + set(minor "${CMAKE_MATCH_3}") + set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}") + endif() +endfunction() diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch new file mode 100644 index 0000000000..7906d49727 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch @@ -0,0 +1,24 @@ +diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt +index 715a263a..3d92ac5d 100644 +--- a/src/ggml-vulkan/CMakeLists.txt ++++ b/src/ggml-vulkan/CMakeLists.txt +@@ -7,6 +7,7 @@ if (POLICY CMP0147) + endif() + + find_package(Vulkan COMPONENTS glslc REQUIRED) ++find_package(SPIRV-Headers QUIET) + + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Parallel build object files +@@ -87,6 +88,11 @@ if (Vulkan_FOUND) + ) + + target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan) ++ ++ if (TARGET SPIRV-Headers::SPIRV-Headers) ++ target_link_libraries(ggml-vulkan PRIVATE SPIRV-Headers::SPIRV-Headers) ++ endif() ++ + target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + + # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake new file mode 100644 index 0000000000..4bb6cce8d1 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -0,0 +1,179 @@ +# ggml-speech: tetherto/qvac-ext-ggml@speech HEAD 44fd4817 (PR #22, +# "bci-whispercpp OpenCL correctness on Android GPUs (Adreno + Samsung +# Xclipse)"). On top of the previous pin (1189e4ce, PR #19) it adds: +# +# 8bf760f4 ggml-vulkan: Parakeet on Adreno 740 Vulkan + Adreno guards +# e6578d01 ggml-vulkan: grow the descriptor-set pool on demand +# 44fd4817 ggml-opencl: bci-whispercpp OpenCL correctness on Android +# GPUs (Adreno + Samsung Xclipse) +# +# These land the Adreno-740 Vulkan + Android OpenCL-correctness fixes the +# tts-ggml (QVAC-20557) and transcription-parakeet (QVAC-20556) addons +# need to run their graphs on the GPU on Android. The Android backend +# packaging (GGML_BACKEND_DL=ON per-arch CPU variants + MODULE GPU .so) +# is unchanged from the previous pin. + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO tetherto/qvac-ext-ggml + REF 44fd4817dd1dc5872053927200e2824b8a0ced86 + SHA512 7d83537e5346fc1a1470e6b7ef191c55b02459ac139a841f39d319e7a5e11aea8e3ed0178cbbb0f0b9f2016e0b79c081411d31707368ccb637940fde3496ec14 + HEAD_REF speech +) + +set(GGML_METAL OFF) +set(GGML_VULKAN OFF) +set(GGML_CUDA OFF) +set(GGML_OPENCL OFF) +set(GGML_METAL_FUSE_MV_BIAS OFF) + +if("metal" IN_LIST FEATURES) + set(GGML_METAL ON) +endif() + +# Off by default: the chatterbox Q-variant mul_mv + bias/residual fusion +# produces zero tokens on parakeet's EOU q8_0 joint network. Consumers +# whose models stay clear of that pattern can opt in for the speedup. +if("metal-fuse-mv-bias" IN_LIST FEATURES) + set(GGML_METAL_FUSE_MV_BIAS ON) +endif() + +if("vulkan" IN_LIST FEATURES) + set(GGML_VULKAN ON) +endif() + +set(GGML_CUDA_COMPILER_OPTION "") + +if("cuda" IN_LIST FEATURES) + set(GGML_CUDA ON) + find_program(NVCC_EXECUTABLE nvcc + PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin + NO_DEFAULT_PATH + ) + if(NOT NVCC_EXECUTABLE) + find_program(NVCC_EXECUTABLE nvcc REQUIRED) + endif() + set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}") + message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}") +endif() + +if("opencl" IN_LIST FEATURES) + set(GGML_OPENCL ON) +endif() + +if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES) + include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake) + detect_ndk_vulkan_version() + message(STATUS "NDK Vulkan version: ${vulkan_version}") + + file(DOWNLOAD + "https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz" + "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + TLS_VERIFY ON + ) + file(ARCHIVE_EXTRACT + INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + DESTINATION "${SOURCE_PATH}" + PATTERNS "*.hpp" + ) + file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/" + DESTINATION "${SOURCE_PATH}/src/") +endif() + +set(PLATFORM_OPTIONS) + +if(VCPKG_TARGET_IS_IOS) + list(APPEND PLATFORM_OPTIONS -DGGML_BLAS=OFF -DGGML_ACCELERATE=OFF) +endif() + +# Hybrid Android backend mode: GPU backends as MODULE .so loaded at runtime +# via dlopen, CPU built as per-arch MODULE .so variants (one per ARMv8.0/ +# 8.2/8.6/9.0/9.2 feature tier) also loaded at runtime via dlopen. The +# downstream addon installs the resulting libqvac-speech-ggml-cpu-android_armv* +# .so files alongside the .bare binary; the per-variant scoring in +# ggml-cpu's `ggml_backend_cpu_aarch64_score` then picks the highest tier +# the running device supports at first use. Pairs with the speech-branch +# `ggml-backend: android per-arch CPU variant dlopen fallback` patch +# (commit 9562ed04) so the variant lookup also succeeds when the consumer +# APK keeps native .so files compressed (AGP `useLegacyPackaging=false`). +if(VCPKG_TARGET_IS_ANDROID) + list(APPEND PLATFORM_OPTIONS + -DGGML_BACKEND_DL=ON + -DGGML_CPU_ALL_VARIANTS=ON + -DGGML_CPU_REPACK=ON + -DGGML_VULKAN_DISABLE_COOPMAT=ON + -DGGML_VULKAN_DISABLE_COOPMAT2=ON + ) +endif() + +# PR #13 (v0.10.2 sync) introduces an unconditional +# `#include ` in src/ggml-vulkan/ggml-vulkan.cpp, +# but the upstream ggml-vulkan CMakeLists.txt never finds spirv-headers nor +# wires its include dir into the ggml-vulkan target. Apply a small patch +# so it does (and depend on spirv-headers in vcpkg.json's vulkan feature). +# TODO: push the equivalent fix upstream and drop this patch. +if("vulkan" IN_LIST FEATURES) + vcpkg_apply_patches( + SOURCE_PATH "${SOURCE_PATH}" + PATCHES + "${CMAKE_CURRENT_LIST_DIR}/patches/0001-ggml-vulkan-find-spirv-headers.patch" + ) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DBUILD_SHARED_LIBS=OFF + -DGGML_NATIVE=OFF + -DGGML_CCACHE=OFF + -DGGML_OPENMP=OFF + -DGGML_LLAMAFILE=OFF + -DGGML_BUILD_TESTS=OFF + -DGGML_BUILD_EXAMPLES=OFF + -DGGML_METAL=${GGML_METAL} + -DGGML_VULKAN=${GGML_VULKAN} + -DGGML_CUDA=${GGML_CUDA} + -DGGML_OPENCL=${GGML_OPENCL} + -DGGML_METAL_FUSE_MV_BIAS=${GGML_METAL_FUSE_MV_BIAS} + -DGGML_LIB_OUTPUT_PREFIX=qvac-speech- + ${GGML_CUDA_COMPILER_OPTION} + ${PLATFORM_OPTIONS} +) + +vcpkg_cmake_install() + +# Pick up the MODULE backend .so files ggml builds into the buildtree's +# bin/ directory (Android dynamic-backend mode). cmake install() doesn't +# move them by default. +if(VCPKG_TARGET_IS_ANDROID) + file(GLOB _backend_sos + "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-speech-ggml-*.so" + ) + if(_backend_sos) + file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib") + endif() +endif() + +vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml) + +if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc") +endif() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig" + "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig") +vcpkg_fixup_pkgconfig() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") + +set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled) + +file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage new file mode 100644 index 0000000000..9b23041f03 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage @@ -0,0 +1,10 @@ +The package ggml provides CMake integration: + + find_package(ggml CONFIG REQUIRED) + target_link_libraries(main PRIVATE ggml::ggml) + +Available vcpkg features: + metal - Metal GPU backend (macOS/iOS, auto-enabled on Apple) + vulkan - Vulkan GPU backend + cuda - CUDA GPU backend + opencl - OpenCL GPU backend diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json new file mode 100644 index 0000000000..8278e125b6 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json @@ -0,0 +1,57 @@ +{ + "name": "ggml-speech", + "version-date": "2026-06-11", + "description": "Speech-stack flavour of ggml from tetherto/qvac-ext-ggml@speech, including the ggml-org v0.10.2 sync (PR #13), the iOS Metal NULL-safety hardening from PR #10, GustavoA1604's Android per-arch CPU dlopen fallback (PR #11), the Mac M2 PAD test fix, and Adreno-aware Android OpenCL/Vulkan backend selection (PR #18, QVAC-18993): on Android the loader detects the GPU via Vulkan and only keeps OpenCL for Adreno > 700, CPU-only for Adreno 1..700, Vulkan/CPU for non-Adreno. Adds Adreno OpenCL elementwise kernels (sin/cos/abs/elu/leaky_relu) for Chatterbox S3Gen and robust Adreno-generation detection (PR #17, #19). Library filenames are prefixed libqvac-speech-ggml-* so they coexist with libqvac-ggml-* (fabric/llm) and libqvac-diffusion-ggml-* on the same Android device. Mutually exclusive with the regular ggml port in the same triplet -- pick one per build.", + "homepage": "https://github.com/tetherto/qvac-ext-ggml/tree/speech", + "license": "MIT", + "dependencies": [ + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + { + "name": "metal", + "platform": "osx | ios" + }, + { + "name": "opencl", + "platform": "android" + }, + { + "name": "vulkan", + "platform": "windows | linux | android" + } + ], + "features": { + "cuda": { + "description": "Enable CUDA GPU backend" + }, + "metal": { + "description": "Enable Metal GPU backend (macOS/iOS)" + }, + "metal-fuse-mv-bias": { + "description": "Compile in the Q-variant mul_mv + ADD(bias) [+ ADD(residual)] fusion in ggml-metal. Off by default: empirically produces zero tokens on parakeet's EOU q8_0 joint network. Opt in only after Metal A/B-validating your model against the fused path." + }, + "opencl": { + "description": "Enable OpenCL GPU backend", + "dependencies": [ + "opencl" + ] + }, + "vulkan": { + "description": "Enable Vulkan GPU backend", + "dependencies": [ + { + "name": "spirv-headers", + "version>=": "1.4.341.0" + } + ] + } + } +} diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/portfile.cmake new file mode 100644 index 0000000000..46553f8496 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/portfile.cmake @@ -0,0 +1,80 @@ +# parakeet-cpp: NVIDIA Parakeet ASR + Sortformer diarization in pure C++/ggml. +# Sourced from the parakeet-cpp/ subfolder of tetherto/qvac-ext-lib-whisper.cpp; +# consumes the ggml-speech port. +# +# Pinned at master HEAD ed749556. Since the previous pin (a34cb6da, PR #38 +# robust Adreno-generation parse) the parakeet-cpp sources are unchanged; +# this bump pairs the port with the newer ggml-speech (speech HEAD +# 44fd4817: Adreno-740 Vulkan + Android OpenCL correctness) so the +# transcription-parakeet addon (QVAC-20556) can run on the GPU on Android. + +set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled) +set(VCPKG_BUILD_TYPE release) + +vcpkg_from_github( + OUT_SOURCE_PATH WHISPER_CPP_SRC + REPO tetherto/qvac-ext-lib-whisper.cpp + REF ed749556dc6923caaf08db3a706d003599988765 + SHA512 03d297a3757983bba37ea78fe08e8626e2a333ffc8852963ce407832b4a65a39c5dc420813e1bc6e7004ee63f7f72b521c4b12bc502e20ef64f0559941485cf0 + HEAD_REF master +) + +set(SOURCE_PATH "${WHISPER_CPP_SRC}/parakeet-cpp") +if (NOT EXISTS "${SOURCE_PATH}/CMakeLists.txt") + message(FATAL_ERROR + "parakeet-cpp: ${SOURCE_PATH}/CMakeLists.txt missing; the parakeet-cpp/ " + "subfolder layout in qvac-ext-lib-whisper.cpp may have changed.") +endif() + +set(GGML_METAL OFF) +set(GGML_VULKAN OFF) +set(GGML_CUDA OFF) +set(GGML_OPENCL OFF) +if("metal" IN_LIST FEATURES) + set(GGML_METAL ON) +endif() +if("vulkan" IN_LIST FEATURES) + set(GGML_VULKAN ON) +endif() +if("cuda" IN_LIST FEATURES) + set(GGML_CUDA ON) +endif() +if("opencl" IN_LIST FEATURES) + set(GGML_OPENCL ON) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + DISABLE_PARALLEL_CONFIGURE + OPTIONS + -DPARAKEET_BUILD_LIBRARY=ON + -DPARAKEET_BUILD_EXECUTABLES=OFF + -DPARAKEET_BUILD_TESTS=OFF + -DPARAKEET_BUILD_EXAMPLES=OFF + -DPARAKEET_INSTALL=ON + -DPARAKEET_USE_SYSTEM_GGML=ON + -DBUILD_SHARED_LIBS=OFF + -DGGML_NATIVE=OFF + -DGGML_OPENMP=OFF + -DPARAKEET_OPENMP=OFF + -DGGML_CCACHE=OFF + -DPARAKEET_CCACHE=OFF + -DGGML_METAL=${GGML_METAL} + -DGGML_VULKAN=${GGML_VULKAN} + -DGGML_CUDA=${GGML_CUDA} + -DGGML_OPENCL=${GGML_OPENCL} +) + +vcpkg_cmake_install() + +vcpkg_cmake_config_fixup(PACKAGE_NAME parakeet-cpp CONFIG_PATH share/parakeet-cpp) + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") + +if (VCPKG_LIBRARY_LINKAGE MATCHES "static") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin") +endif() + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/vcpkg.json b/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/vcpkg.json new file mode 100644 index 0000000000..fffe516e01 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/parakeet-cpp/vcpkg.json @@ -0,0 +1,81 @@ +{ + "name": "parakeet-cpp", + "version-date": "2026-06-12", + "description": "Parakeet (NVIDIA FastConformer ASR + Sortformer diarization) inference in pure C++/ggml. Ships CTC, TDT, EOU and Sortformer engines under one Engine umbrella, plus the cross-engine StreamEvent API (VadStateChanged / EndOfTurn). Sourced from tetherto/qvac-ext-lib-whisper.cpp's parakeet-cpp/ subfolder; consumes the ggml-speech port.", + "homepage": "https://github.com/tetherto/qvac-ext-lib-whisper.cpp/tree/master/parakeet-cpp", + "license": "MIT", + "dependencies": [ + { + "name": "ggml-speech", + "version>=": "2026-06-11" + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + { + "name": "metal", + "platform": "osx | ios" + }, + { + "name": "opencl", + "platform": "android" + }, + { + "name": "vulkan", + "platform": "windows | linux | android" + } + ], + "features": { + "cuda": { + "description": "Enable CUDA GPU acceleration", + "dependencies": [ + { + "name": "ggml-speech", + "features": [ + "cuda" + ] + } + ] + }, + "metal": { + "description": "Enable Metal GPU acceleration (macOS / iOS)", + "dependencies": [ + { + "name": "ggml-speech", + "features": [ + "metal" + ] + } + ] + }, + "opencl": { + "description": "Enable OpenCL GPU acceleration (Android / Adreno)", + "dependencies": [ + { + "name": "ggml-speech", + "features": [ + "opencl" + ] + } + ] + }, + "vulkan": { + "description": "Enable Vulkan GPU acceleration", + "dependencies": [ + { + "name": "ggml-speech", + "features": [ + "vulkan" + ] + } + ] + } + } +} diff --git a/packages/transcription-parakeet/vcpkg.json b/packages/transcription-parakeet/vcpkg.json index f2e4279f1b..5f3b147c54 100644 --- a/packages/transcription-parakeet/vcpkg.json +++ b/packages/transcription-parakeet/vcpkg.json @@ -4,19 +4,19 @@ "dependencies": [ { "name": "parakeet-cpp", - "version>=": "2026-06-10", + "version>=": "2026-06-12", "features": ["metal"], "platform": "osx | ios" }, { "name": "parakeet-cpp", - "version>=": "2026-06-10", + "version>=": "2026-06-12", "features": ["vulkan", "opencl"], "platform": "android" }, { "name": "parakeet-cpp", - "version>=": "2026-06-10", + "version>=": "2026-06-12", "features": ["vulkan"], "platform": "!(osx | ios | android)" },