Skip to content

Commit

Permalink
Support spoken language identification with whisper (#694)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Mar 24, 2024
1 parent 3cdad9b commit 0d258dd
Show file tree
Hide file tree
Showing 36 changed files with 1,173 additions and 200 deletions.
98 changes: 98 additions & 0 deletions .github/scripts/test-spoken-language-identification.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash

set -e

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

names=(
tiny
base
small
medium
)

# all_language_codes=bo,ml,tt,fa,sl,bg,sn,sr,tl,km,ln,mr,hr,eu,ro,ba,bs,pl,as,nn,sk,ko,oc,ar,uz,pa,tg,mk,kk,hi,ha,uk,is,de,el,ja,yo,be,so,tk,id,sa,ru,yi,en,am,cs,ne,la,sv,su,pt,mi,ca,sd,hy,haw,fi,et,kn,da,lt,it,nl,he,mg,ur,tr,af,br,bn,ta,no,my,si,mt,th,gl,sw,mn,jw,ms,ps,fo,ka,hu,zh,ht,az,fr,lo,sq,gu,cy,lv,es,lb,te,vi

log "Download test waves"
waves=(
ar-arabic.wav
bg-bulgarian.wav
cs-czech.wav
da-danish.wav
de-german.wav
el-greek.wav
en-english.wav
es-spanish.wav
fa-persian.wav
fi-finnish.wav
fr-french.wav
hi-hindi.wav
hr-croatian.wav
id-indonesian.wav
it-italian.wav
ja-japanese.wav
ko-korean.wav
nl-dutch.wav
no-norwegian.wav
po-polish.wav
pt-portuguese.wav
ro-romanian.wav
ru-russian.wav
sk-slovak.wav
sv-swedish.wav
ta-tamil.wav
tl-tagalog.wav
tr-turkish.wav
uk-ukrainian.wav
zh-chinese.wav
)

for wav in ${waves[@]}; do
echo "Downloading $wav"
curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav
ls -lh *.wav
done

for name in ${names[@]}; do
log "------------------------------------------------------------"
log "Run $name"
log "------------------------------------------------------------"

repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-whisper-$name
log "Start testing ${repo_url}"
repo=$(basename $repo_url)
log "Download pretrained model and test-data from $repo_url"

GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
pushd $repo
git lfs pull --include "*.onnx"
# git lfs pull --include "*.ort"
ls -lh *.onnx
popd

for wav in ${waves[@]}; do
log "test fp32 onnx"

time $EXE \
--whisper-encoder=$repo/${name}-encoder.onnx \
--whisper-decoder=$repo/${name}-decoder.onnx \
$wav

log "test int8 onnx"

time $EXE \
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
$wav
done
rm -rf $repo
done
1 change: 0 additions & 1 deletion .github/workflows/build-wheels-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ jobs:
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
shell: bash
with:
max_attempts: 20
timeout_seconds: 200
Expand Down
17 changes: 1 addition & 16 deletions .github/workflows/build-wheels-macos-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,12 @@ jobs:
fail-fast: false
matrix:
os: [macos-latest]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]

steps:
- uses: actions/checkout@v4

# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
# for a list of versions
- name: Build wheels
if: matrix.python-version == 'cp37'
uses: pypa/[email protected]
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'"
CIBW_ARCHS: "arm64"
CIBW_BUILD_VERBOSITY: 3

# Don't repair macOS wheels
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

- name: Build wheels
if: matrix.python-version != 'cp37'
uses: pypa/[email protected]
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/linux-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ jobs:
file build/bin/sherpa-onnx
readelf -d build/bin/sherpa-onnx
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |
Expand All @@ -116,6 +124,7 @@ jobs:
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
shell: bash
run: |
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ jobs:
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
path: build/bin/*

- name: Test spoken language identification
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test transducer kws
shell: bash
run: |
Expand All @@ -140,6 +149,7 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test spoken language identification
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-spoken-language-identification.sh
- name: Test transducer kws
shell: bash
run: |
Expand Down Expand Up @@ -135,6 +144,7 @@ jobs:
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows-x64-cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-language-identification.exe
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test spoken language identification
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-language-identification.exe
.github/scripts/test-spoken-language-identification.sh
- name: Test online CTC
shell: bash
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
# - name: Test spoken language identification
# shell: bash
# run: |
# export PATH=$PWD/build/bin/Release:$PATH
# export EXE=sherpa-onnx-offline-language-identification.exe
#
# .github/scripts/test-spoken-language-identification.sh

- name: Test online CTC
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.9.13")
set(SHERPA_ONNX_VERSION "1.9.14")

# Disable warning about
#
Expand Down
77 changes: 45 additions & 32 deletions cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,50 @@ def enable_alsa():
return build_alsa and is_linux() and (is_arm64() or is_x86())


def get_binaries():
binaries = [
"sherpa-onnx",
"sherpa-onnx-keyword-spotter",
"sherpa-onnx-microphone",
"sherpa-onnx-microphone-offline",
"sherpa-onnx-microphone-offline-speaker-identification",
"sherpa-onnx-offline",
"sherpa-onnx-offline-language-identification",
"sherpa-onnx-offline-tts",
"sherpa-onnx-offline-tts-play",
"sherpa-onnx-offline-websocket-server",
"sherpa-onnx-online-websocket-client",
"sherpa-onnx-online-websocket-server",
"sherpa-onnx-vad-microphone",
"sherpa-onnx-vad-microphone-offline-asr",
]

if enable_alsa():
binaries += [
"sherpa-onnx-alsa",
"sherpa-onnx-alsa-offline",
"sherpa-onnx-alsa-offline-speaker-identification",
"sherpa-onnx-offline-tts-play-alsa",
]

if is_windows():
binaries += [
"espeak-ng.dll",
"kaldi-decoder-core.dll",
"kaldi-native-fbank-core.dll",
"onnxruntime.dll",
"piper_phonemize.dll",
"sherpa-onnx-c-api.dll",
"sherpa-onnx-core.dll",
"sherpa-onnx-fst.lib",
"sherpa-onnx-kaldifst-core.lib",
"sherpa-onnx-portaudio.dll",
"ucd.dll",
]

return binaries


try:
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel

Expand Down Expand Up @@ -150,38 +194,7 @@ def build_extension(self, ext: setuptools.extension.Extension):
suffix = ".exe" if is_windows() else ""
# Remember to also change setup.py

binaries = ["sherpa-onnx"]
binaries += ["sherpa-onnx-keyword-spotter"]
binaries += ["sherpa-onnx-offline"]
binaries += ["sherpa-onnx-microphone"]
binaries += ["sherpa-onnx-microphone-offline"]
binaries += ["sherpa-onnx-microphone-offline-speaker-identification"]
binaries += ["sherpa-onnx-online-websocket-server"]
binaries += ["sherpa-onnx-offline-websocket-server"]
binaries += ["sherpa-onnx-online-websocket-client"]
binaries += ["sherpa-onnx-vad-microphone"]
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]
binaries += ["sherpa-onnx-offline-tts-play"]

if enable_alsa():
binaries += ["sherpa-onnx-alsa"]
binaries += ["sherpa-onnx-alsa-offline"]
binaries += ["sherpa-onnx-offline-tts-play-alsa"]
binaries += ["sherpa-onnx-alsa-offline-speaker-identification"]

if is_windows():
binaries += ["kaldi-native-fbank-core.dll"]
binaries += ["sherpa-onnx-c-api.dll"]
binaries += ["sherpa-onnx-core.dll"]
binaries += ["sherpa-onnx-portaudio.dll"]
binaries += ["onnxruntime.dll"]
binaries += ["piper_phonemize.dll"]
binaries += ["espeak-ng.dll"]
binaries += ["ucd.dll"]
binaries += ["kaldi-decoder-core.dll"]
binaries += ["sherpa-onnx-fst.lib"]
binaries += ["sherpa-onnx-kaldifst-core.lib"]
binaries = get_binaries()

for f in binaries:
suffix = "" if (".dll" in f or ".lib" in f) else suffix
Expand Down
Loading

0 comments on commit 0d258dd

Please sign in to comment.