From 72ea103e9b2f56c052e7c400a8c965c143153f31 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 6 Feb 2024 17:21:45 +0800 Subject: [PATCH] Add WebAssembly (#300) --- .github/workflows/build-wasm-simd.yaml | 107 +++++++++ .github/workflows/go.yaml | 13 +- .github/workflows/test-pip-install.yaml | 2 +- CMakeLists.txt | 7 +- build-wasm-simd.sh | 75 ++++++ wasm/CMakeLists.txt | 56 +++++ wasm/app.js | 299 ++++++++++++++++++++++++ wasm/assets/.gitignore | 4 + wasm/assets/.gitkeep | 0 wasm/assets/README.md | 34 +++ wasm/index.html | 40 ++++ wasm/sherpa-ncnn-wasm-main.cc | 38 +++ wasm/sherpa-ncnn.js | 272 +++++++++++++++++++++ 13 files changed, 941 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/build-wasm-simd.yaml create mode 100755 build-wasm-simd.sh create mode 100644 wasm/CMakeLists.txt create mode 100644 wasm/app.js create mode 100644 wasm/assets/.gitignore create mode 100644 wasm/assets/.gitkeep create mode 100644 wasm/assets/README.md create mode 100644 wasm/index.html create mode 100644 wasm/sherpa-ncnn-wasm-main.cc create mode 100644 wasm/sherpa-ncnn.js diff --git a/.github/workflows/build-wasm-simd.yaml b/.github/workflows/build-wasm-simd.yaml new file mode 100644 index 00000000..d84f4578 --- /dev/null +++ b/.github/workflows/build-wasm-simd.yaml @@ -0,0 +1,107 @@ +name: wasm-simd + +on: + push: + branches: + - master + paths: + - '.github/workflows/build-wasm-simd.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-ncnn/csrc/*' + - 'wasm/*' + pull_request: + branches: + - master + paths: + - '.github/workflows/build-wasm-simd.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-ncnn/csrc/*' + - 'wasm/*' + + release: + types: + - published + + workflow_dispatch: + inputs: + release: + description: "Whether to release" + type: boolean + +env: + RELEASE: + |- # Release if there is a release tag name or a release flag in workflow_dispatch + ${{ github.event.release.tag_name != '' || github.event.inputs.release == 'true' }} + +concurrency: + group: linux-${{ github.ref }} + cancel-in-progress: true + +jobs: + wasm-simd: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install emsdk + uses: mymindstorm/setup-emsdk@v14 + + - name: View emsdk version + shell: bash + run: | + emcc -v + echo "--------------------" + emcc --check + + - name: Download model files + shell: bash + run: | + cd wasm/assets + ls -lh + echo "----------" + wget -q https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + tar xvf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + mv -v sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/*pnnx.ncnn.param . + mv -v sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/*pnnx.ncnn.bin . + mv -v sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt . + + rm -rf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 + rm -v sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + + ls -lh + + - name: Build sherpa-ncnn for WebAssembly + shell: bash + run: | + ./build-wasm-simd.sh + + - name: collect files + shell: bash + run: | + SHERPA_NCNN_VERSION=v$(grep "SHERPA_NCNN_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + mv build-wasm-simd/install/bin/wasm sherpa-ncnn-wasm-simd + ls -lh sherpa-ncnn-wasm-simd + tar cjfv sherpa-ncnn-wasm-simd-${SHERPA_NCNN_VERSION}.tar.bz2 ./sherpa-ncnn-wasm-simd + + - name: Upload wasm files + uses: actions/upload-artifact@v4 + with: + name: sherpa-ncnn-wasm-simd + path: ./sherpa-ncnn-wasm-simd-*.tar.bz2 + + - name: Release wasm files + if: env.RELEASE == 'true' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./sherpa-ncnn-wasm-simd*.tar.bz2 diff --git a/.github/workflows/go.yaml b/.github/workflows/go.yaml index a3a48429..d4356a7e 100644 --- a/.github/workflows/go.yaml +++ b/.github/workflows/go.yaml @@ -48,11 +48,18 @@ jobs: go env GOPATH go env GOARCH - - name: Set up MinGW - if: matrix.os == 'windows-latest' - uses: egor-tensin/setup-mingw@v2 + - name: Set up MinGW for x64 + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' + uses: csukuangfj/setup-mingw@v2.2.1 + with: + platform: ${{ matrix.arch }} + + - name: Set up MinGW for x86 + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' + uses: csukuangfj/setup-mingw@v2.2.1 with: platform: ${{ matrix.arch }} + version: '12.2.0' - name: Show gcc if: matrix.os == 'windows-latest' diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 7254b988..42a2851b 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -27,7 +27,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 diff --git a/CMakeLists.txt b/CMakeLists.txt index 0953fd03..b7c6f1c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-ncnn) -set(SHERPA_NCNN_VERSION "2.1.6") +set(SHERPA_NCNN_VERSION "2.1.7") # Disable warning about # @@ -40,6 +40,7 @@ option(SHERPA_NCNN_ENABLE_JNI "Whether to build JNI internface" OFF) option(SHERPA_NCNN_ENABLE_BINARY "Whether to build the binary sherpa-ncnn" ON) option(SHERPA_NCNN_ENABLE_TEST "Whether to build tests" OFF) option(SHERPA_NCNN_ENABLE_C_API "Whether to build C API" ON) +option(SHERPA_NCNN_ENABLE_WASM "Whether to enable WASM" OFF) option(SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE "Whether to generate-int8-scale-table" ON) option(SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES "Whether to enable ffmpeg-examples" OFF) @@ -137,7 +138,6 @@ if(WIN32 AND MSVC) endforeach() endif() - include(kaldi-native-fbank) include(ncnn) @@ -159,6 +159,9 @@ if(SHERPA_NCNN_ENABLE_C_API AND SHERPA_NCNN_ENABLE_BINARY) add_subdirectory(c-api-examples) endif() +if(SHERPA_NCNN_ENABLE_WASM) + add_subdirectory(wasm) +endif() set(SHERPA_NCNN_PKG_CONFIG_EXTRA_LIBS) diff --git a/build-wasm-simd.sh b/build-wasm-simd.sh new file mode 100755 index 00000000..145fe489 --- /dev/null +++ b/build-wasm-simd.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Copyright (c) 2024 Xiaomi Corporation +# +# This script is to build sherpa-ncnn for WebAssembly +# +# See also +# https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-webassembly +# +# Please refer to +# https://k2-fsa.github.io/sherpa/ncnn/wasm/index.html +# for more details. + +set -ex + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd +pushd build-wasm-simd + +export SHERPA_NCNN_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + -DNCNN_THREADS=OFF \ + -DNCNN_OPENMP=OFF \ + -DNCNN_SIMPLEOMP=OFF \ + -DNCNN_RUNTIME_CPU=OFF \ + -DNCNN_SSE2=ON \ + -DNCNN_AVX2=OFF \ + -DNCNN_AVX=OFF \ + -DNCNN_BUILD_TOOLS=OFF \ + -DNCNN_BUILD_EXAMPLES=OFF \ + -DNCNN_BUILD_BENCHMARK=OFF \ + \ + -DSHERPA_NCNN_ENABLE_WASM=ON \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_NCNN_ENABLE_PYTHON=OFF \ + -DSHERPA_NCNN_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_NCNN_ENABLE_JNI=OFF \ + -DSHERPA_NCNN_ENABLE_BINARY=OFF \ + -DSHERPA_NCNN_ENABLE_TEST=OFF \ + -DSHERPA_NCNN_ENABLE_C_API=ON \ + -DSHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE=OFF \ + -DSHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES=OFF \ + .. + +make -j2 +make install +ls -lh install/bin/wasm diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt new file mode 100644 index 00000000..4cc93276 --- /dev/null +++ b/wasm/CMakeLists.txt @@ -0,0 +1,56 @@ +if(NOT $ENV{SHERPA_NCNN_IS_USING_BUILD_WASM_SH}) + message(FATAL_ERROR "Please use ./build-wasm.sh to build for wasm") +endif() + +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin") + message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin does not exist") + message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") +endif() + +set(exported_functions + AcceptWaveform + CreateRecognizer + CreateStream + Decode + DestroyRecognizer + DestroyResult + DestroyStream + GetResult + InputFinished + IsEndpoint + IsReady + Reset + ) +set(mangled_exported_functions) +foreach(x IN LISTS exported_functions) + list(APPEND mangled_exported_functions "_${x}") +endforeach() + +list(JOIN mangled_exported_functions "," all_exported_functions) + +include_directories(${CMAKE_SOURCE_DIR}) +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB ") +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") +string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") + +add_executable(sherpa-ncnn-wasm-main sherpa-ncnn-wasm-main.cc) +target_link_libraries(sherpa-ncnn-wasm-main sherpa-ncnn-core sherpa-ncnn-c-api) +install(TARGETS sherpa-ncnn-wasm-main DESTINATION bin/wasm) + +install( + FILES + "sherpa-ncnn.js" + "app.js" + "index.html" + "$/sherpa-ncnn-wasm-main.js" + "$/sherpa-ncnn-wasm-main.wasm" + "$/sherpa-ncnn-wasm-main.data" + DESTINATION + bin/wasm +) diff --git a/wasm/app.js b/wasm/app.js new file mode 100644 index 00000000..cb27db97 --- /dev/null +++ b/wasm/app.js @@ -0,0 +1,299 @@ +// This file copies and modifies code +// from https://mdn.github.io/web-dictaphone/scripts/app.js +// and https://gist.github.com/meziantou/edb7217fddfbb70e899e + +const startBtn = document.getElementById('startBtn'); +const stopBtn = document.getElementById('stopBtn'); +const clearBtn = document.getElementById('clearBtn'); +const hint = document.getElementById('hint'); +const soundClips = document.getElementById('sound-clips'); + +let textArea = document.getElementById('results'); + +let lastResult = ''; +let resultList = []; + +clearBtn.onclick = function() { + resultList = []; + textArea.value = getDisplayResult(); + textArea.scrollTop = textArea.scrollHeight; // auto scroll +}; + +function getDisplayResult() { + let i = 0; + let ans = ''; + for (let s in resultList) { + if (resultList[s] == '') { + continue; + } + + ans += '' + i + ': ' + resultList[s] + '\n'; + i += 1; + } + + if (lastResult.length > 0) { + ans += '' + i + ': ' + lastResult + '\n'; + } + return ans; +} + + +Module = {}; +Module.onRuntimeInitialized = function() { + console.log('inited!'); + hint.innerText = 'Model loaded! Please click start'; + + startBtn.disabled = false; + + recognizer = createRecognizer(); + console.log('recognizer is created!', recognizer); +}; + +let audioCtx; +let mediaStream; + +let expectedSampleRate = 16000; +let recordSampleRate; // the sampleRate of the microphone +let recorder = null; // the microphone +let leftchannel = []; // TODO: Use a single channel + +let recordingLength = 0; // number of samples so far + +let recognizer = null; +let recognizer_stream = null; + +if (navigator.mediaDevices.getUserMedia) { + console.log('getUserMedia supported.'); + + // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia + const constraints = {audio: true}; + + let onSuccess = function(stream) { + if (!audioCtx) { + audioCtx = new AudioContext({sampleRate: 16000}); + } + console.log(audioCtx); + recordSampleRate = audioCtx.sampleRate; + console.log('sample rate ' + recordSampleRate); + + // creates an audio node from the microphone incoming stream + mediaStream = audioCtx.createMediaStreamSource(stream); + console.log('media stream', mediaStream); + + // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor + // bufferSize: the onaudioprocess event is called when the buffer is full + var bufferSize = 4096; + var numberOfInputChannels = 1; + var numberOfOutputChannels = 2; + if (audioCtx.createScriptProcessor) { + recorder = audioCtx.createScriptProcessor( + bufferSize, numberOfInputChannels, numberOfOutputChannels); + } else { + recorder = audioCtx.createJavaScriptNode( + bufferSize, numberOfInputChannels, numberOfOutputChannels); + } + console.log('recorder', recorder); + + recorder.onaudioprocess = function(e) { + let samples = new Float32Array(e.inputBuffer.getChannelData(0)) + samples = downsampleBuffer(samples, expectedSampleRate); + + if (recognizer_stream == null) { + recognizer_stream = recognizer.createStream(); + } + + recognizer_stream.acceptWaveform(expectedSampleRate, samples); + while (recognizer.isReady(recognizer_stream)) { + recognizer.decode(recognizer_stream); + } + + let isEndpoint = recognizer.isEndpoint(recognizer_stream); + let result = recognizer.getResult(recognizer_stream); + + + if (result.length > 0 && lastResult != result) { + lastResult = result; + } + + if (isEndpoint) { + if (lastResult.length > 0) { + resultList.push(lastResult); + lastResult = ''; + } + recognizer.reset(recognizer_stream); + } + + textArea.value = getDisplayResult(); + textArea.scrollTop = textArea.scrollHeight; // auto scroll + + let buf = new Int16Array(samples.length); + for (var i = 0; i < samples.length; ++i) { + let s = samples[i]; + if (s >= 1) + s = 1; + else if (s <= -1) + s = -1; + + samples[i] = s; + buf[i] = s * 32767; + } + + leftchannel.push(buf); + recordingLength += bufferSize; + }; + + startBtn.onclick = function() { + mediaStream.connect(recorder); + recorder.connect(audioCtx.destination); + + console.log('recorder started'); + + stopBtn.disabled = false; + startBtn.disabled = true; + }; + + stopBtn.onclick = function() { + console.log('recorder stopped'); + + // stopBtn recording + recorder.disconnect(audioCtx.destination); + mediaStream.disconnect(recorder); + + startBtn.style.background = ''; + startBtn.style.color = ''; + // mediaRecorder.requestData(); + + stopBtn.disabled = true; + startBtn.disabled = false; + + var clipName = new Date().toISOString(); + + const clipContainer = document.createElement('article'); + const clipLabel = document.createElement('p'); + const audio = document.createElement('audio'); + const deleteButton = document.createElement('button'); + clipContainer.classList.add('clip'); + audio.setAttribute('controls', ''); + deleteButton.textContent = 'Delete'; + deleteButton.className = 'delete'; + + clipLabel.textContent = clipName; + + clipContainer.appendChild(audio); + + clipContainer.appendChild(clipLabel); + clipContainer.appendChild(deleteButton); + soundClips.appendChild(clipContainer); + + audio.controls = true; + let samples = flatten(leftchannel); + const blob = toWav(samples); + + leftchannel = []; + const audioURL = window.URL.createObjectURL(blob); + audio.src = audioURL; + console.log('recorder stopped'); + + deleteButton.onclick = function(e) { + let evtTgt = e.target; + evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode); + }; + + clipLabel.onclick = function() { + const existingName = clipLabel.textContent; + const newClipName = prompt('Enter a new name for your sound clip?'); + if (newClipName === null) { + clipLabel.textContent = existingName; + } else { + clipLabel.textContent = newClipName; + } + }; + }; + }; + + let onError = function(err) { + console.log('The following error occured: ' + err); + }; + + navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError); +} else { + console.log('getUserMedia not supported on your browser!'); + alert('getUserMedia not supported on your browser!'); +} + + +// this function is copied/modified from +// https://gist.github.com/meziantou/edb7217fddfbb70e899e +function flatten(listOfSamples) { + let n = 0; + for (let i = 0; i < listOfSamples.length; ++i) { + n += listOfSamples[i].length; + } + let ans = new Int16Array(n); + + let offset = 0; + for (let i = 0; i < listOfSamples.length; ++i) { + ans.set(listOfSamples[i], offset); + offset += listOfSamples[i].length; + } + return ans; +} + +// this function is copied/modified from +// https://gist.github.com/meziantou/edb7217fddfbb70e899e +function toWav(samples) { + let buf = new ArrayBuffer(44 + samples.length * 2); + var view = new DataView(buf); + + // http://soundfile.sapp.org/doc/WaveFormat/ + // F F I R + view.setUint32(0, 0x46464952, true); // chunkID + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize + // E V A W + view.setUint32(8, 0x45564157, true); // format + // + // t m f + view.setUint32(12, 0x20746d66, true); // subchunk1ID + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM + view.setUint32(20, 1, true); // audioFormat, 1 for PCM + view.setUint16(22, 1, true); // numChannels: 1 channel + view.setUint32(24, expectedSampleRate, true); // sampleRate + view.setUint32(28, expectedSampleRate * 2, true); // byteRate + view.setUint16(32, 2, true); // blockAlign + view.setUint16(34, 16, true); // bitsPerSample + view.setUint32(36, 0x61746164, true); // Subchunk2ID + view.setUint32(40, samples.length * 2, true); // subchunk2Size + + let offset = 44; + for (let i = 0; i < samples.length; ++i) { + view.setInt16(offset, samples[i], true); + offset += 2; + } + + return new Blob([view], {type: 'audio/wav'}); +} + +// this function is copied from +// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46 +function downsampleBuffer(buffer, exportSampleRate) { + if (exportSampleRate === recordSampleRate) { + return buffer; + } + var sampleRateRatio = recordSampleRate / exportSampleRate; + var newLength = Math.round(buffer.length / sampleRateRatio); + var result = new Float32Array(newLength); + var offsetResult = 0; + var offsetBuffer = 0; + while (offsetResult < result.length) { + var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio); + var accum = 0, count = 0; + for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) { + accum += buffer[i]; + count++; + } + result[offsetResult] = accum / count; + offsetResult++; + offsetBuffer = nextOffsetBuffer; + } + return result; +}; diff --git a/wasm/assets/.gitignore b/wasm/assets/.gitignore new file mode 100644 index 00000000..0080cbfd --- /dev/null +++ b/wasm/assets/.gitignore @@ -0,0 +1,4 @@ +*.ncnn.param +*.ncnn.bin +tokens.txt +*.wav diff --git a/wasm/assets/.gitkeep b/wasm/assets/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/wasm/assets/README.md b/wasm/assets/README.md new file mode 100644 index 00000000..592f4dae --- /dev/null +++ b/wasm/assets/README.md @@ -0,0 +1,34 @@ +# Introduction + +Please refer to +https://github.com/k2-fsa/sherpa-ncnn/releases/tag/models +to download a model. + +The following is an example: +``` +cd /path/to/this/directory +wget -q https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +tar xf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +rm sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +mv sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/*pnnx.ncnn.param . +mv sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/*pnnx.ncnn.bin . +mv sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt . +``` + +You should have the following files in `assets` before you can run +`build-wasm-simd.sh` + +``` +assets fangjun$ tree . +. +├── README.md +├── decoder_jit_trace-pnnx.ncnn.bin +├── decoder_jit_trace-pnnx.ncnn.param +├── encoder_jit_trace-pnnx.ncnn.bin +├── encoder_jit_trace-pnnx.ncnn.param +├── joiner_jit_trace-pnnx.ncnn.bin +├── joiner_jit_trace-pnnx.ncnn.param +└── tokens.txt + +0 directories, 8 files +``` diff --git a/wasm/index.html b/wasm/index.html new file mode 100644 index 00000000..dcd3154d --- /dev/null +++ b/wasm/index.html @@ -0,0 +1,40 @@ + + + + + + Next-gen Kaldi WebAssembly with sherpa-ncnn for ASR + + + + +

+ Next-gen Kaldi + WebAssembly
+ ASR Demo with sherpa-ncnn +

+
+ Loading model ... ... +
+
+ + + +
+
+ +
+ +
+
+ + + + + diff --git a/wasm/sherpa-ncnn-wasm-main.cc b/wasm/sherpa-ncnn-wasm-main.cc new file mode 100644 index 00000000..7a5e1eb6 --- /dev/null +++ b/wasm/sherpa-ncnn-wasm-main.cc @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) + * + * See LICENSE for clarification regarding multiple authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include "sherpa-ncnn/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +static_assert(sizeof(SherpaNcnnFeatureExtractorConfig) == 4 * 2, ""); +static_assert(sizeof(SherpaNcnnModelConfig) == 4 * 9, ""); +static_assert(sizeof(SherpaNcnnDecoderConfig) == 4 * 2, ""); +static_assert(sizeof(SherpaNcnnRecognizerConfig) == + 4 * 2 + 4 * 9 + 4 * 2 + 4 * 4 + 4 * 2, + ""); + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} diff --git a/wasm/sherpa-ncnn.js b/wasm/sherpa-ncnn.js new file mode 100644 index 00000000..6f3f8b2c --- /dev/null +++ b/wasm/sherpa-ncnn.js @@ -0,0 +1,272 @@ + + +function freeConfig(config) { + if ('buffer' in config) { + _free(config.buffer); + } + _free(config.ptr); +} + +// The user should free the returned pointers +function initSherpaNcnnModelConfig(config) { + let encoderParamLen = lengthBytesUTF8(config.encoderParam) + 1; + let decoderParamLen = lengthBytesUTF8(config.decoderParam) + 1; + let joinerParamLen = lengthBytesUTF8(config.joinerParam) + 1; + + let encoderBinLen = lengthBytesUTF8(config.encoderBin) + 1; + let decoderBinLen = lengthBytesUTF8(config.decoderBin) + 1; + let joinerBinLen = lengthBytesUTF8(config.joinerBin) + 1; + + let tokensLen = lengthBytesUTF8(config.tokens) + 1; + + let n = encoderParamLen + decoderParamLen + joinerParamLen; + n += encoderBinLen + decoderBinLen + joinerBinLen; + n += tokensLen; + + let buffer = _malloc(n); + let ptr = _malloc(4 * 9); + + let offset = 0; + stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen); + offset += encoderParamLen; + + stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen); + offset += encoderBinLen; + + stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen); + offset += decoderParamLen; + + stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen); + offset += decoderBinLen; + + stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen); + offset += joinerParamLen; + + stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen); + offset += joinerBinLen; + + stringToUTF8(config.tokens, buffer + offset, tokensLen); + offset += tokensLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); // encoderParam + offset += encoderParamLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); // encoderBin + offset += encoderBinLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); // decoderParam + offset += decoderParamLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); // decoderBin + offset += decoderBinLen; + + Module.setValue(ptr + 16, buffer + offset, 'i8*'); // joinerParam + offset += joinerParamLen; + + Module.setValue(ptr + 20, buffer + offset, 'i8*'); // joinerBin + offset += joinerBinLen; + + Module.setValue(ptr + 24, buffer + offset, 'i8*'); // tokens + offset += tokensLen; + + Module.setValue(ptr + 28, config.useVulkanCompute, 'i32'); + Module.setValue(ptr + 32, config.numThreads, 'i32'); + + return { + buffer: buffer, ptr: ptr, len: 36, + } +} + +function initSherpaNcnnDecoderConfig(config) { + let n = lengthBytesUTF8(config.decodingMethod) + 1; + let buffer = _malloc(n); + let ptr = _malloc(4 * 2); + + stringToUTF8(config.decodingMethod, buffer, n); + + Module.setValue(ptr, buffer, 'i8*'); + Module.setValue(ptr + 4, config.numActivePaths, 'i32'); + + return { + buffer: buffer, ptr: ptr, len: 8, + } +} + +function initSherpaNcnnFeatureExtractorConfig(config) { + let ptr = _malloc(4 * 2); + Module.setValue(ptr, config.samplingRate, 'float'); + Module.setValue(ptr + 4, config.featureDim, 'i32'); + return { + ptr: ptr, len: 8, + } +} + +function initSherpaNcnnRecognizerConfig(config) { + let featConfig = initSherpaNcnnFeatureExtractorConfig(config.featConfig); + let modelConfig = initSherpaNcnnModelConfig(config.modelConfig); + let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig); + + let numBytes = + featConfig.len + modelConfig.len + decoderConfig.len + 4 * 4 + 4 * 2; + + let ptr = _malloc(numBytes); + let offset = 0; + _CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + offset += featConfig.len; + + _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) + offset += modelConfig.len; + + _CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset) + offset += decoderConfig.len; + + Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.rule3MinUtternceLength, 'float'); + offset += 4; + + Module.setValue(ptr + offset, 0, 'i32'); // hotwords file + offset += 4; + + Module.setValue(ptr + offset, 0.5, 'float'); // hotwords_score + offset += 4; + + return { + ptr: ptr, len: numBytes, featConfig: featConfig, modelConfig: modelConfig, + decoderConfig: decoderConfig, + } +} + +class Stream { + constructor(handle) { + this.handle = handle; + this.pointer = null; + this.n = 0 + } + + free() { + if (this.handle) { + _DestroyStream(this.handle); + this.handle = null; + _free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + + /** + * @param sampleRate {Number} + * @param samples {Float32Array} Containing samples in the range [-1, 1] + */ + acceptWaveform(sampleRate, samples) { + if (this.n < samples.length) { + _free(this.pointer); + this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length + } + + Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); + } + + inputFinished() { + _InputFinished(this.handle); + } +}; + +class Recognizer { + constructor(configObj, borrowedHandle) { + if (borrowedHandle) { + this.handle = borrowedHandle; + return; + } + + let config = initSherpaNcnnRecognizerConfig(configObj) + let handle = _CreateRecognizer(config.ptr); + + freeConfig(config.featConfig); + freeConfig(config.modelConfig); + freeConfig(config.decoderConfig); + freeConfig(config); + + this.handle = handle; + } + + free() { + _DestroyRecognizer(this.handle); + this.handle = 0 + } + + createStream() { + let handle = _CreateStream(this.handle); + return new Stream(handle); + } + + isReady(stream) { + return _IsReady(this.handle, stream.handle) == 1; + } + + isEndpoint(stream) { + return _IsEndpoint(this.handle, stream.handle) == 1; + } + + decode(stream) { + return _Decode(this.handle, stream.handle); + } + + reset(stream) { + _Reset(this.handle, stream.handle); + } + + getResult(stream) { + let r = _GetResult(this.handle, stream.handle); + let textPtr = getValue(r, 'i8*'); + let text = UTF8ToString(textPtr); + _DestroyResult(r); + return text; + } +} + +function createRecognizer() { + let modelConfig = { + encoderParam: './encoder_jit_trace-pnnx.ncnn.param', + encoderBin: './encoder_jit_trace-pnnx.ncnn.bin', + decoderParam: './decoder_jit_trace-pnnx.ncnn.param', + decoderBin: './decoder_jit_trace-pnnx.ncnn.bin', + joinerParam: './joiner_jit_trace-pnnx.ncnn.param', + joinerBin: './joiner_jit_trace-pnnx.ncnn.bin', + tokens: './tokens.txt', + useVulkanCompute: 0, + numThreads: 1, + }; + + let decoderConfig = { + decodingMethod: 'greedy_search', + numActivePaths: 4, + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + decoderConfig: decoderConfig, + enableEndpoint: 1, + rule1MinTrailingSilence: 1.2, + rule2MinTrailingSilence: 2.4, + rule3MinUtternceLength: 20, + }; + + return new Recognizer(configObj); +}