From 06fd50f5368d69db6b6905b14dfa17aca95075f0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 29 Jul 2024 20:49:38 +0800 Subject: [PATCH] Add test about whisper large-v3 for .Net (#1187) --- .github/scripts/test-dot-net.sh | 30 +++++++++++++ .github/workflows/test-dot-net-nuget.yaml | 43 +++++++++++++++++++ .github/workflows/test-dot-net.yaml | 43 +++++++++++++++++++ .../run-whisper-large-v3.sh | 32 ++++++++++++++ 4 files changed, 148 insertions(+) create mode 100755 dotnet-examples/offline-decode-files/run-whisper-large-v3.sh diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index 2b40669bd..c397fc0cd 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -4,38 +4,68 @@ cd dotnet-examples/ cd ./offline-decode-files ./run-sense-voice-ctc.sh +rm -rf sherpa-onnx-* + ./run-paraformer-itn.sh +rm -rf sherpa-onnx-* + ./run-telespeech-ctc.sh +rm -rf sherpa-onnx-* + ./run-nemo-ctc.sh +rm -rf sherpa-onnx-* + ./run-paraformer.sh +rm -rf sherpa-onnx-* + ./run-zipformer.sh +rm -rf sherpa-onnx-* + ./run-hotwords.sh +rm -rf sherpa-onnx-* + ./run-whisper.sh +rm -rf sherpa-onnx-* + +./run-whisper-large-v3.sh +rm -rf sherpa-onnx-* + ./run-tdnn-yesno.sh +rm -rf sherpa-onnx-* cd ../keyword-spotting-from-files ./run.sh cd ../online-decode-files ./run-transducer-itn.sh +rm -rf sherpa-onnx-* + ./run-zipformer2-ctc.sh +rm -rf sherpa-onnx-* + ./run-transducer.sh +rm -rf sherpa-onnx-* + ./run-paraformer.sh +rm -rf sherpa-onnx-* cd ../vad-non-streaming-asr-paraformer ./run.sh cd ../offline-punctuation ./run.sh +rm -rf sherpa-onnx-* cd ../speaker-identification ./run.sh cd ../streaming-hlg-decoding/ ./run.sh +rm -rf sherpa-onnx-* cd ../spoken-language-identification ./run.sh +rm -rf sherpa-onnx-* cd ../offline-tts ./run-aishell3.sh diff --git a/.github/workflows/test-dot-net-nuget.yaml b/.github/workflows/test-dot-net-nuget.yaml index f472cb42c..d32582441 100644 --- a/.github/workflows/test-dot-net-nuget.yaml +++ b/.github/workflows/test-dot-net-nuget.yaml @@ -32,6 +32,49 @@ jobs: with: fetch-depth: 0 + - name: Free space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + + - name: Free more space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + # https://github.com/orgs/community/discussions/25678 + cd /opt + find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';' + + sudo rm -rf /usr/share/dotnet + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Free Disk Space (Ubuntu) + if: matrix.os == 'ubuntu-latest' + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: false + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Check space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + df -h + - name: Setup .NET 6.0 uses: actions/setup-dotnet@v4 with: diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index ffab91f1b..6e32b155e 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -47,6 +47,49 @@ jobs: with: fetch-depth: 0 + - name: Free space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + + - name: Free more space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + # https://github.com/orgs/community/discussions/25678 + cd /opt + find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';' + + sudo rm -rf /usr/share/dotnet + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Free Disk Space (Ubuntu) + if: matrix.os == 'ubuntu-latest' + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: false + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Check space + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + df -h + - name: ccache uses: hendrikmuhs/ccache-action@v1.2 with: diff --git a/dotnet-examples/offline-decode-files/run-whisper-large-v3.sh b/dotnet-examples/offline-decode-files/run-whisper-large-v3.sh new file mode 100755 index 000000000..715ad029c --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-whisper-large-v3.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./large-v3-encoder.int8.onnx ]; then + git lfs install + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-large-v3 + + ls -lh sherpa-onnx-whisper-large-v3 + cp -v sherpa-onnx-whisper-large-v3/*.onnx . + cp -v sherpa-onnx-whisper-large-v3/*.weights . + ls -lh +fi + +dotnet run \ + --num-threads=2 \ + --whisper-encoder=./large-v3-encoder.int8.onnx \ + --whisper-decoder=./large-v3-decoder.int8.onnx \ + --tokens=./sherpa-onnx-whisper-large-v3/large-v3-tokens.txt \ + --files ./sherpa-onnx-whisper-large-v3/test_wavs/0.wav \ + ./sherpa-onnx-whisper-large-v3/test_wavs/1.wav \ + ./sherpa-onnx-whisper-large-v3/test_wavs/8k.wav + +dotnet run \ + --num-threads=2 \ + --whisper-encoder=./large-v3-encoder.onnx \ + --whisper-decoder=./large-v3-decoder.onnx \ + --tokens=./sherpa-onnx-whisper-large-v3/large-v3-tokens.txt \ + --files ./sherpa-onnx-whisper-large-v3/test_wavs/0.wav \ + ./sherpa-onnx-whisper-large-v3/test_wavs/1.wav \ + ./sherpa-onnx-whisper-large-v3/test_wavs/8k.wav