From 35c1b4a7a9376c6bb80ac461e1b2169be563f908 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 2 Aug 2024 10:21:24 +0800 Subject: [PATCH] Add ReazonSpeech Japanese pre-trained model (#1203) --- .github/scripts/test-offline-transducer.sh | 44 +++++++++++++++++++++ .github/workflows/linux.yaml | 21 +++++----- .github/workflows/macos.yaml | 9 +++-- scripts/apk/generate-vad-asr-apk-script.py | 19 +++++++++ sherpa-onnx/kotlin-api/OfflineRecognizer.kt | 13 ++++++ 5 files changed, 92 insertions(+), 14 deletions(-) diff --git a/.github/scripts/test-offline-transducer.sh b/.github/scripts/test-offline-transducer.sh index ee012de32..1bec7ec9b 100755 --- a/.github/scripts/test-offline-transducer.sh +++ b/.github/scripts/test-offline-transducer.sh @@ -15,6 +15,50 @@ echo "PATH: $PATH" which $EXE +log "------------------------------------------------------------------------" +log "Run zipformer transducer models (Japanese from ReazonSpeech) " +log "------------------------------------------------------------------------" +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2 + +name=$(basename $url) +curl -SL -O $url +tar xvf $name +rm $name +repo=$(basename -s .tar.bz2 $name) +ls -lh $repo + +cat $repo/test_wavs/*.txt + +log "test $repo" +test_wavs=( +1.wav +2.wav +3.wav +4.wav +5.wav +) + +for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --encoder=$repo/encoder-epoch-99-avg-1.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ + --joiner=$repo/joiner-epoch-99-avg-1.onnx \ + --debug=1 \ + $repo/test_wavs/$w +done + +for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ + --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ + --debug=1 \ + $repo/test_wavs/$w +done +rm -rf $repo + log "------------------------------------------------------------------------" log "Run Nemo fast conformer hybrid transducer ctc models (transducer branch)" log "------------------------------------------------------------------------" diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 3d5077312..67fe9be74 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -141,6 +141,17 @@ jobs: name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: install/* + - name: Test offline transducer + shell: bash + run: | + du -h -d1 . + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-transducer.sh + du -h -d1 . + + - name: Test offline CTC shell: bash run: | @@ -191,16 +202,6 @@ jobs: .github/scripts/test-online-ctc.sh du -h -d1 . - - name: Test offline transducer - shell: bash - run: | - du -h -d1 . - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline - - .github/scripts/test-offline-transducer.sh - du -h -d1 . - - name: Test C API shell: bash run: | diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index 29b784ae5..e8c271106 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -113,21 +113,22 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx - - name: Test offline CTC + - name: Test offline transducer shell: bash run: | export PATH=$PWD/build/bin:$PATH export EXE=sherpa-onnx-offline - .github/scripts/test-offline-ctc.sh + .github/scripts/test-offline-transducer.sh - - name: Test offline transducer + + - name: Test offline CTC shell: bash run: | export PATH=$PWD/build/bin:$PATH export EXE=sherpa-onnx-offline - .github/scripts/test-offline-transducer.sh + .github/scripts/test-offline-ctc.sh - name: Test online CTC shell: bash diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index e48c240bd..45ffadca7 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -275,6 +275,25 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01", + idx=16, + lang="ja", + short_name="zipformer_reazonspeech", + cmd=""" + pushd $model_name + + rm -rfv test_wavs + + rm -fv encoder-epoch-99-avg-1.onnx + rm -fv decoder-epoch-99-avg-1.int8.onnx + rm -fv joiner-epoch-99-avg-1.onnx + + ls -lh + popd """, ), diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 27e8ed5b9..edabee57b 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -338,6 +338,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { tokens = "$modelDir/tokens.txt", ) } + + 16 -> { + val modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } } return null }