From 484e666ed1db7ce43382d3db454084dc157c59e7 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 16:26:58 +0800 Subject: [PATCH 01/18] add WebAssembly for Kws --- CMakeLists.txt | 5 + sherpa-onnx/c-api/c-api.cc | 157 ++++++++++ sherpa-onnx/c-api/c-api.h | 109 +++++++ .../csrc/keyword-spotter-transducer-impl.h | 15 +- wasm/CMakeLists.txt | 4 + wasm/kws/CMakeLists.txt | 55 ++++ wasm/kws/app.js | 290 ++++++++++++++++++ wasm/kws/index.html | 40 +++ wasm/kws/sherpa-onnx-kws.js | 227 ++++++++++++++ wasm/kws/sherpa-onnx-wasm-main-kws.cc | 30 ++ wasm/nodejs/CMakeLists.txt | 10 + 11 files changed, 941 insertions(+), 1 deletion(-) create mode 100644 wasm/kws/CMakeLists.txt create mode 100644 wasm/kws/app.js create mode 100644 wasm/kws/index.html create mode 100644 wasm/kws/sherpa-onnx-kws.js create mode 100644 wasm/kws/sherpa-onnx-wasm-main-kws.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index e890bfb7e..cf41dbf4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) +option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) @@ -133,6 +134,10 @@ if(SHERPA_ONNX_ENABLE_WASM) add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) endif() +if(SHERPA_ONNX_ENABLE_WASM_KWS) + add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1) +endif() + if(NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") endif() diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 407b359ac..f1bcbda1f 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -17,6 +17,7 @@ #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/voice-activity-detector.h" #include "sherpa-onnx/csrc/wave-writer.h" +#include "sherpa-onnx/csrc/keyword-spotter.h" struct SherpaOnnxOnlineRecognizer { std::unique_ptr impl; @@ -648,3 +649,159 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); } + +struct SherpaOnnxOnlineKws { + std::unique_ptr impl; +}; + +// ============================================================ +// For KWS +// ============================================================ +// +SherpaOnnxOnlineKws *CreateOnlineKws( + const SherpaOnnxOnlineKwsConfig *config) { + + sherpa_onnx::KeywordSpotterConfig kws_config; + + kws_config.feat_config.sampling_rate = + SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); + + kws_config.feat_config.feature_dim = + SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); + + kws_config.model_config.transducer.encoder = + SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); + + kws_config.model_config.transducer.decoder = + SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); + + kws_config.model_config.transducer.joiner = + SHERPA_ONNX_OR(config->model_config.transducer.joiner, ""); + + kws_config.model_config.tokens = + SHERPA_ONNX_OR(config->model_config.tokens, ""); + + kws_config.model_config.num_threads = + SHERPA_ONNX_OR(config->model_config.num_threads, 1); + + kws_config.max_active_paths = + SHERPA_ONNX_OR(config->max_active_paths, 4); + + kws_config.num_trailing_blanks = + SHERPA_ONNX_OR(config->num_trailing_blanks, 1); + + kws_config.num_trailing_blanks = + SHERPA_ONNX_OR(config->keywords_score, 1.0); + + kws_config.keywords_threshold = + SHERPA_ONNX_OR(config->keywords_threshold, 0.25); + + kws_config.keywords_file = SHERPA_ONNX_OR(config->keywords, ""); + + SHERPA_ONNX_LOGE("%s\n", kws_config.ToString().c_str()); + + SherpaOnnxOnlineKws *kws_recognizer = new SherpaOnnxOnlineKws; + + kws_recognizer->impl = + std::make_unique(kws_config); + + return kws_recognizer; +} + +SherpaOnnxOnlineStream *CreateOnlineKwsStream( + const SherpaOnnxOnlineKws *kws_recognizer) { + SHERPA_ONNX_LOGE("c-api.cc : create stream"); + SherpaOnnxOnlineStream *stream = + new SherpaOnnxOnlineStream(kws_recognizer->impl->CreateStream()); + SHERPA_ONNX_LOGE("c-api.cc : create stream done"); + return stream; +} + +void DestroyOnlineKwsStream(SherpaOnnxOnlineStream *stream) { delete stream; } + +void DestroyOnlineKws(SherpaOnnxOnlineKws *recognizer) { + delete recognizer; +} + +int32_t IsOnlineKwsStreamReady(SherpaOnnxOnlineKws *recognizer, + SherpaOnnxOnlineStream *stream) { + return recognizer->impl->IsReady(stream->impl.get()); +} + +void DecodeOnlineKwsStream(SherpaOnnxOnlineKws *recognizer, + SherpaOnnxOnlineStream *stream) { + recognizer->impl->DecodeStream(stream->impl.get()); +} + +const SherpaOnnxOnlineKwsResult *GetOnlineKwsStreamResult( + SherpaOnnxOnlineKws *recognizer, SherpaOnnxOnlineStream *stream) { + sherpa_onnx::KeywordResult result = + recognizer->impl->GetResult(stream->impl.get()); + const auto &text = result.keyword; + + auto r = new SherpaOnnxOnlineKwsResult; + memset(r, 0, sizeof(SherpaOnnxOnlineKwsResult)); + + // copy text + r->keyword = new char[text.size() + 1]; + std::copy(text.begin(), text.end(), const_cast(r->keyword)); + const_cast(r->keyword)[text.size()] = 0; + + // copy json + const auto &json = result.AsJsonString(); + r->json = new char[json.size() + 1]; + std::copy(json.begin(), json.end(), const_cast(r->json)); + const_cast(r->json)[json.size()] = 0; + + // copy tokens + auto count = result.tokens.size(); + if (count > 0) { + size_t total_length = 0; + for (const auto &token : result.tokens) { + // +1 for the null character at the end of each token + total_length += token.size() + 1; + } + + // Each word ends with nullptr + r->tokens = new char[total_length]; + memset(reinterpret_cast(const_cast(r->tokens)), 0, + total_length); + char **tokens_temp = new char *[count]; + int32_t pos = 0; + for (int32_t i = 0; i < count; ++i) { + tokens_temp[i] = const_cast(r->tokens) + pos; + memcpy(reinterpret_cast(const_cast(r->tokens + pos)), + result.tokens[i].c_str(), result.tokens[i].size()); + // +1 to move past the null character + pos += result.tokens[i].size() + 1; + } + r->tokens_arr = tokens_temp; + + if (!result.timestamps.empty()) { + r->timestamps = new float[count]; + std::copy(result.timestamps.begin(), result.timestamps.end(), + r->timestamps); + } else { + r->timestamps = nullptr; + } + + } else { + r->timestamps = nullptr; + r->tokens = nullptr; + r->tokens_arr = nullptr; + } + + return r; +} + +void DestroyOnlineKwsResult(const SherpaOnnxOnlineKwsResult *r) { + if (r) { + delete[] r->keyword; + delete[] r->json; + delete[] r->tokens; + delete[] r->tokens_arr; + delete[] r->timestamps; + delete r; + } +} + diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 62b2f4dcd..7cab8f1f2 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -689,6 +689,115 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename); +// ============================================================ +// For online KWS +// ============================================================ + +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsModelConfig { + SherpaOnnxOnlineTransducerModelConfig transducer; + const char *tokens; + int32_t num_threads; +} SherpaOnnxOnlineKwsModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsConfig { + SherpaOnnxFeatureConfig feat_config; + SherpaOnnxOnlineKwsModelConfig model_config; + + /// Used only when decoding_method is modified_beam_search + /// Example value: 4 + int32_t max_active_paths; + int32_t num_trailing_blanks; + float keywords_score; + float keywords_threshold; + /// Path to the keywords. + const char *keywords; +} SherpaOnnxOnlineKwsConfig; + + +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsResult { + // Recognized text + const char *keyword; + + // Pointer to continuous memory which holds string based tokens + // which are separated by \0 + const char *tokens; + + // a pointer array containing the address of the first item in tokens + const char *const *tokens_arr; + + // Pointer to continuous memory which holds timestamps + float *timestamps; + + /** Return a json string. + * + * The returned string contains: + * { + * "keyword": "The kws keyword result", + * "tokens": [x, x, x], + * "timestamps": [x, x, x], + * } + */ + const char *json; +} SherpaOnnxOnlineKwsResult; + +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKws SherpaOnnxOnlineKws; + +/// @param config Config for the kws recognizer. +/// @return Return a pointer to the kws recognizer. The user has to invoke +// DestroyOnlineKws() to free it to avoid memory leak. +SHERPA_ONNX_API SherpaOnnxOnlineKws *CreateOnlineKws( + const SherpaOnnxOnlineKwsConfig *config); + +SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineKwsStream( + const SherpaOnnxOnlineKws *kws_recognizer); + +/// Free a pointer returned by CreateOnlineKws() +/// @param recognizer A pointer returned by CreateOnlineKws() +SHERPA_ONNX_API void DestroyOnlineKws( + SherpaOnnxOnlineKws *recognizer); + +/// Destroy an online stream. +/// @param stream A pointer returned by CreateOnlineStream() +SHERPA_ONNX_API void DestroyOnlineKwsStream(SherpaOnnxOnlineStream *stream); + +/// Get the decoding results so far for an OnlineKwsStream. +/// +/// @param recognizer A pointer returned by CreateOnlineKws(). +/// @param stream A pointer returned by CreateOnlineKwsStream(). +/// @return A pointer containing the result. The user has to invoke +/// DestroyOnlineKwsResult() to free the returned pointer to +/// avoid memory leak. +SHERPA_ONNX_API const SherpaOnnxOnlineKwsResult *GetOnlineKwsStreamResult( + SherpaOnnxOnlineKws *recognizer, SherpaOnnxOnlineStream *stream); + +/// Destroy the pointer returned by GetOnlineKwsStreamResult(). +/// +/// @param r A pointer returned by GetOnlineKwsStreamResult() +SHERPA_ONNX_API void DestroyOnlineKwsResult( + const SherpaOnnxOnlineKwsResult *r); + +/// Return 1 if there are enough number of feature frames for decoding. +/// Return 0 otherwise. +/// +/// @param kws_recognizer A pointer returned by CreateOnlineKws +/// @param stream A pointer returned by CreateOnlineKwsStream +SHERPA_ONNX_API int32_t IsOnlineKwsStreamReady( + SherpaOnnxOnlineKws *kws_recognizer, SherpaOnnxOnlineStream *stream); + + +/// Call this function to run the neural network model and decoding. +// +/// Precondition for this function: IsOnlineStreamReady() MUST return 1. +/// +/// Usage example: +/// +/// while (IsOnlineKwsStreamReady(recognizer, stream)) { +/// DecodeOnlineKwsStream(recognizer, stream); +/// } +/// +SHERPA_ONNX_API void DecodeOnlineKwsStream(SherpaOnnxOnlineKws *kws_recognizer, + SherpaOnnxOnlineStream *stream); + #if defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h index 50a3e252b..ee67e8976 100644 --- a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h +++ b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h @@ -98,9 +98,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { #endif std::unique_ptr CreateStream() const override { + SHERPA_ONNX_LOGE("test impl: create stream"); auto stream = std::make_unique(config_.feat_config, keywords_graph_); InitOnlineStream(stream.get()); + SHERPA_ONNX_LOGE("impl: create stream done"); return stream; } @@ -266,8 +268,16 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { } void InitKeywords() { - // each line in keywords_file contains space-separated words +#ifdef SHERPA_ONNX_ENABLE_WASM_KWS + // Due to the limitations of the wasm file system, + // the keyword_file variable is directly parsed as a string of keywords + // if WASM KWS on + SHERPA_ONNX_LOGE("SHERPA_ONNX_ENABLE_WASM_KWS ON : keyword is %s", config_.keywords_file.c_str()); + std::istringstream is(config_.keywords_file); + InitKeywords(is); +#else + // each line in keywords_file contains space-separated words std::ifstream is(config_.keywords_file); if (!is) { SHERPA_ONNX_LOGE("Open keywords file failed: %s", @@ -275,6 +285,9 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { exit(-1); } InitKeywords(is); +#endif + + } #if __ANDROID_API__ >= 9 diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index c5d283f19..d7c7a1a17 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -6,6 +6,10 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR) add_subdirectory(asr) endif() +if(SHERPA_ONNX_ENABLE_WASM_KWS) + add_subdirectory(kws) +endif() + if(SHERPA_ONNX_ENABLE_WASM_NODEJS) add_subdirectory(nodejs) endif() diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt new file mode 100644 index 000000000..ba435f082 --- /dev/null +++ b/wasm/kws/CMakeLists.txt @@ -0,0 +1,55 @@ +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) + message(FATAL_ERROR "Please use ./build-wasm-kws.sh to build for wasm KWS") +endif() + +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx") + message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx does not exist") + message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") +endif() + +set(exported_functions + AcceptWaveform + CreateOnlineKws + CreateOnlineKwsStream + GetOnlineKwsStreamResult + DecodeOnlineKwsStream + DestroyOnlineKws + DestroyOnlineKwsResult + DestroyOnlineKwsStream + IsOnlineKwsStreamReady + InputFinished +) +set(mangled_exported_functions) +foreach(x IN LISTS exported_functions) + list(APPEND mangled_exported_functions "_${x}") +endforeach() + +list(JOIN mangled_exported_functions "," all_exported_functions) + +include_directories(${CMAKE_SOURCE_DIR}) +set(MY_FLAGS "-g -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") +string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") + +add_executable(sherpa-onnx-wasm-kws-main sherpa-onnx-wasm-main-kws.cc) +target_link_libraries(sherpa-onnx-wasm-kws-main sherpa-onnx-c-api) +install(TARGETS sherpa-onnx-wasm-kws-main DESTINATION bin/wasm) + +install( + FILES + "sherpa-onnx-kws.js" + "app.js" + "index.html" + "$/sherpa-onnx-wasm-kws-main.js" + "$/sherpa-onnx-wasm-kws-main.wasm" + "$/sherpa-onnx-wasm-kws-main.data" + DESTINATION + bin/wasm +) \ No newline at end of file diff --git a/wasm/kws/app.js b/wasm/kws/app.js new file mode 100644 index 000000000..e823f0494 --- /dev/null +++ b/wasm/kws/app.js @@ -0,0 +1,290 @@ +// This file copies and modifies code +// from https://mdn.github.io/web-dictaphone/scripts/app.js +// and https://gist.github.com/meziantou/edb7217fddfbb70e899e + +const startBtn = document.getElementById('startBtn'); +const stopBtn = document.getElementById('stopBtn'); +const clearBtn = document.getElementById('clearBtn'); +const hint = document.getElementById('hint'); +const soundClips = document.getElementById('sound-clips'); + +let textArea = document.getElementById('results'); + +let lastResult = ''; +let resultList = []; + +clearBtn.onclick = function() { + resultList = []; + textArea.value = getDisplayResult(); + textArea.scrollTop = textArea.scrollHeight; // auto scroll +}; + +function getDisplayResult() { + let i = 0; + let ans = ''; + for (let s in resultList) { + if (resultList[s] == '') { + continue; + } + + ans += '' + i + ': ' + resultList[s] + '\n'; + i += 1; + } + + return ans; +} + + +Module = {}; +Module.onRuntimeInitialized = function() { + console.log('inited!'); + hint.innerText = 'Model loaded! Please click start'; + + startBtn.disabled = false; + + recognizer = createKws(Module); + console.log('recognizer is created!', recognizer); +}; + +let audioCtx; +let mediaStream; + +let expectedSampleRate = 16000; +let recordSampleRate; // the sampleRate of the microphone +let recorder = null; // the microphone +let leftchannel = []; // TODO: Use a single channel + +let recordingLength = 0; // number of samples so far + +let recognizer = null; +let recognizer_stream = null; + +if (navigator.mediaDevices.getUserMedia) { + console.log('getUserMedia supported.'); + + // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia + const constraints = {audio: true}; + + let onSuccess = function(stream) { + if (!audioCtx) { + audioCtx = new AudioContext({sampleRate: 16000}); + } + console.log(audioCtx); + recordSampleRate = audioCtx.sampleRate; + console.log('sample rate ' + recordSampleRate); + + // creates an audio node from the microphone incoming stream + mediaStream = audioCtx.createMediaStreamSource(stream); + console.log('media stream', mediaStream); + + // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor + // bufferSize: the onaudioprocess event is called when the buffer is full + var bufferSize = 4096; + var numberOfInputChannels = 1; + var numberOfOutputChannels = 2; + if (audioCtx.createScriptProcessor) { + recorder = audioCtx.createScriptProcessor( + bufferSize, numberOfInputChannels, numberOfOutputChannels); + } else { + recorder = audioCtx.createJavaScriptNode( + bufferSize, numberOfInputChannels, numberOfOutputChannels); + } + console.log('recorder', recorder); + + recorder.onaudioprocess = function(e) { + let samples = new Float32Array(e.inputBuffer.getChannelData(0)) + samples = downsampleBuffer(samples, expectedSampleRate); + + if (recognizer_stream == null) { + recognizer_stream = recognizer.createStream(); + } + + recognizer_stream.acceptWaveform(expectedSampleRate, samples); + while (recognizer.isReady(recognizer_stream)) { + recognizer.decode(recognizer_stream); + } + + + let result = recognizer.getResult(recognizer_stream); + console.log(result) + + if (result.keyword.length > 0) { + lastResult = result; + resultList.push(JSON.stringify(result)); + } + + + textArea.value = getDisplayResult(); + textArea.scrollTop = textArea.scrollHeight; // auto scroll + + let buf = new Int16Array(samples.length); + for (var i = 0; i < samples.length; ++i) { + let s = samples[i]; + if (s >= 1) + s = 1; + else if (s <= -1) + s = -1; + + samples[i] = s; + buf[i] = s * 32767; + } + + leftchannel.push(buf); + recordingLength += bufferSize; + }; + + startBtn.onclick = function() { + mediaStream.connect(recorder); + recorder.connect(audioCtx.destination); + + console.log('recorder started'); + + stopBtn.disabled = false; + startBtn.disabled = true; + }; + + stopBtn.onclick = function() { + console.log('recorder stopped'); + + // stopBtn recording + recorder.disconnect(audioCtx.destination); + mediaStream.disconnect(recorder); + + startBtn.style.background = ''; + startBtn.style.color = ''; + // mediaRecorder.requestData(); + + stopBtn.disabled = true; + startBtn.disabled = false; + + var clipName = new Date().toISOString(); + + const clipContainer = document.createElement('article'); + const clipLabel = document.createElement('p'); + const audio = document.createElement('audio'); + const deleteButton = document.createElement('button'); + clipContainer.classList.add('clip'); + audio.setAttribute('controls', ''); + deleteButton.textContent = 'Delete'; + deleteButton.className = 'delete'; + + clipLabel.textContent = clipName; + + clipContainer.appendChild(audio); + + clipContainer.appendChild(clipLabel); + clipContainer.appendChild(deleteButton); + soundClips.appendChild(clipContainer); + + audio.controls = true; + let samples = flatten(leftchannel); + const blob = toWav(samples); + + leftchannel = []; + const audioURL = window.URL.createObjectURL(blob); + audio.src = audioURL; + console.log('recorder stopped'); + + deleteButton.onclick = function(e) { + let evtTgt = e.target; + evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode); + }; + + clipLabel.onclick = function() { + const existingName = clipLabel.textContent; + const newClipName = prompt('Enter a new name for your sound clip?'); + if (newClipName === null) { + clipLabel.textContent = existingName; + } else { + clipLabel.textContent = newClipName; + } + }; + }; + }; + + let onError = function(err) { + console.log('The following error occured: ' + err); + }; + + navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError); +} else { + console.log('getUserMedia not supported on your browser!'); + alert('getUserMedia not supported on your browser!'); +} + + +// this function is copied/modified from +// https://gist.github.com/meziantou/edb7217fddfbb70e899e +function flatten(listOfSamples) { + let n = 0; + for (let i = 0; i < listOfSamples.length; ++i) { + n += listOfSamples[i].length; + } + let ans = new Int16Array(n); + + let offset = 0; + for (let i = 0; i < listOfSamples.length; ++i) { + ans.set(listOfSamples[i], offset); + offset += listOfSamples[i].length; + } + return ans; +} + +// this function is copied/modified from +// https://gist.github.com/meziantou/edb7217fddfbb70e899e +function toWav(samples) { + let buf = new ArrayBuffer(44 + samples.length * 2); + var view = new DataView(buf); + + // http://soundfile.sapp.org/doc/WaveFormat/ + // F F I R + view.setUint32(0, 0x46464952, true); // chunkID + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize + // E V A W + view.setUint32(8, 0x45564157, true); // format + // + // t m f + view.setUint32(12, 0x20746d66, true); // subchunk1ID + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM + view.setUint32(20, 1, true); // audioFormat, 1 for PCM + view.setUint16(22, 1, true); // numChannels: 1 channel + view.setUint32(24, expectedSampleRate, true); // sampleRate + view.setUint32(28, expectedSampleRate * 2, true); // byteRate + view.setUint16(32, 2, true); // blockAlign + view.setUint16(34, 16, true); // bitsPerSample + view.setUint32(36, 0x61746164, true); // Subchunk2ID + view.setUint32(40, samples.length * 2, true); // subchunk2Size + + let offset = 44; + for (let i = 0; i < samples.length; ++i) { + view.setInt16(offset, samples[i], true); + offset += 2; + } + + return new Blob([view], {type: 'audio/wav'}); +} + +// this function is copied from +// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46 +function downsampleBuffer(buffer, exportSampleRate) { + if (exportSampleRate === recordSampleRate) { + return buffer; + } + var sampleRateRatio = recordSampleRate / exportSampleRate; + var newLength = Math.round(buffer.length / sampleRateRatio); + var result = new Float32Array(newLength); + var offsetResult = 0; + var offsetBuffer = 0; + while (offsetResult < result.length) { + var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio); + var accum = 0, count = 0; + for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) { + accum += buffer[i]; + count++; + } + result[offsetResult] = accum / count; + offsetResult++; + offsetBuffer = nextOffsetBuffer; + } + return result; +}; \ No newline at end of file diff --git a/wasm/kws/index.html b/wasm/kws/index.html new file mode 100644 index 000000000..cd0c43f54 --- /dev/null +++ b/wasm/kws/index.html @@ -0,0 +1,40 @@ + + + + + + Next-gen Kaldi WebAssembly with sherpa-onnx for kws + + + + +

+ WebAssembly
+ Kws Demo with sherpa-onnx +

+
+ Loading model ... ... +
+
+ + + +
+
+ +
+ +
+
+ + + + + \ No newline at end of file diff --git a/wasm/kws/sherpa-onnx-kws.js b/wasm/kws/sherpa-onnx-kws.js new file mode 100644 index 000000000..ea6e6c863 --- /dev/null +++ b/wasm/kws/sherpa-onnx-kws.js @@ -0,0 +1,227 @@ + + +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + Module._free(config.ptr); +} + +// The user should free the returned pointers +function initModelConfig(config, Module) { + + let encoderBinLen = Module.lengthBytesUTF8(config.transducer.encoder) + 1; + let decoderBinLen = Module.lengthBytesUTF8(config.transducer.decoder) + 1; + let joinerBinLen = Module.lengthBytesUTF8(config.transducer.joiner) + 1; + + let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + + let n = encoderBinLen + decoderBinLen + joinerBinLen + tokensLen; + + let buffer = Module._malloc(n); + let ptr = Module._malloc(4 * 5); + + let offset = 0; + Module.stringToUTF8(config.transducer.encoder, buffer + offset, encoderBinLen); + offset += encoderBinLen; + + Module.stringToUTF8(config.transducer.decoder, buffer + offset, decoderBinLen); + offset += encoderBinLen; + + Module.stringToUTF8(config.transducer.joiner, buffer + offset, joinerBinLen); + offset += joinerBinLen; + + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); + offset += tokensLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); // encoderBin + offset += encoderBinLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); // decoderBin + offset += decoderBinLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); // joinerBin + offset += joinerBinLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); // tokens + offset += tokensLen; + + Module.setValue(ptr + 16, config.numThreads, 'i32'); // numThread + + return { + buffer: buffer, ptr: ptr, len: 20, + } +} + +function initFeatureExtractorConfig(config, Module) { + let ptr = Module._malloc(4 * 2); + Module.setValue(ptr, config.samplingRate, 'i32'); + Module.setValue(ptr + 4, config.featureDim, 'i32'); + return { + ptr: ptr, len: 8, + } +} + +function initKwsConfig(config, Module) { + let featConfig = + initFeatureExtractorConfig(config.featConfig, Module); + + let modelConfig = initModelConfig(config.modelConfig, Module); + let numBytes = + featConfig.len + modelConfig.len + 4 * 5; + + let ptr = Module._malloc(numBytes); + let offset = 0; + Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + offset += featConfig.len; + + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) + offset += modelConfig.len; + + + Module.setValue(ptr + offset, config.maxActivePaths, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numTrailingBlanks, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsScore, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsThreshold, 'float'); + offset += 4; + + let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1; + let keywordsBuffer = Module._malloc(keywordsLen); + Module.stringToUTF8(config.keywords, keywordsBuffer, keywordsLen); + Module.setValue(ptr + offset, keywordsBuffer, 'i8*'); + offset += 4; + + return { + ptr: ptr, len: numBytes, featConfig: featConfig, modelConfig: modelConfig + } +} + +class Stream { + constructor(handle, Module) { + this.handle = handle; + this.pointer = null; + this.n = 0; + this.Module = Module; + } + + free() { + if (this.handle) { + this.Module._DestroyOnlineKwsStream(this.handle); + this.handle = null; + this.Module._free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + + /** + * @param sampleRate {Number} + * @param samples {Float32Array} Containing samples in the range [-1, 1] + */ + acceptWaveform(sampleRate, samples) { + if (this.n < samples.length) { + this.Module._free(this.pointer); + this.pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length + } + + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module._AcceptWaveform( + this.handle, sampleRate, this.pointer, samples.length); + } + + inputFinished() { + _InputFinished(this.handle); + } +}; + +class Kws { + constructor(configObj, Module) { + this.config = configObj; + let config = initKwsConfig(configObj, Module) + let handle = Module._CreateOnlineKws(config.ptr); + + + freeConfig(config.featConfig, Module); + freeConfig(config.modelConfig, Module); + freeConfig(config, Module); + + this.handle = handle; + this.Module = Module; + } + + free() { + this.Module._DestroyOnlineKws(this.handle); + this.handle = 0 + } + + createStream() { + let handle = this.Module._CreateOnlineKwsStream(this.handle); + return new Stream(handle, this.Module); + } + + isReady(stream) { + return this.Module._IsOnlineKwsStreamReady(this.handle, stream.handle) === 1; + } + + + decode(stream) { + return this.Module._DecodeOnlineKwsStream(this.handle, stream.handle); + } + + getResult(stream) { + let r = this.Module._GetOnlineKwsStreamResult(this.handle, stream.handle); + let jsonPtr = this.Module.getValue(r + 16, 'i8*'); + let json = this.Module.UTF8ToString(jsonPtr); + this.Module._DestroyOnlineKwsResult(r); + return JSON.parse(json); + } +} + +function createKws(Module, myConfig) { + let transducerConfig = { + encoder: './encoder-epoch-12-avg-2-chunk-16-left-64.onnx', + decoder: './decoder-epoch-12-avg-2-chunk-16-left-64.onnx', + joiner: './joiner-epoch-12-avg-2-chunk-16-left-64.onnx', + } + let modelConfig = { + transducer: transducerConfig, + tokens: './tokens.txt', + numThreads: 1 + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + maxActivePaths: 4, + numTrailingBlanks: 1, + keywordsScore: 1.0, + keywordsThreshold: 0.25, + keywords: "x iǎo s ài @小赛" + }; + + if (myConfig) { + configObj = myConfig; + } + return new Kws(configObj, Module); +} + +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + createKws, + }; +} \ No newline at end of file diff --git a/wasm/kws/sherpa-onnx-wasm-main-kws.cc b/wasm/kws/sherpa-onnx-wasm-main-kws.cc new file mode 100644 index 000000000..9c79d56a7 --- /dev/null +++ b/wasm/kws/sherpa-onnx-wasm-main-kws.cc @@ -0,0 +1,30 @@ +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineModelConfig) == + sizeof(SherpaOnnxOnlineTransducerModelConfig) + + sizeof(SherpaOnnxOnlineParaformerModelConfig) + + sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4, + ""); +static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineKwsConfig) == + sizeof(SherpaOnnxFeatureConfig) + + sizeof(SherpaOnnxOnlineKwsModelConfig) + 5 * 4, + ""); + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt index faff50ea6..faab65660 100644 --- a/wasm/nodejs/CMakeLists.txt +++ b/wasm/nodejs/CMakeLists.txt @@ -37,6 +37,15 @@ set(exported_functions DecodeMultipleOfflineStreams GetOfflineStreamResult DestroyOfflineRecognizerResult + # online kws + CreateOnlineKws + CreateOnlineKwsStream + GetOnlineKwsStreamResult + DecodeOnlineKwsStream + DestroyOnlineKws + DestroyOnlineKwsResult + DestroyOnlineKwsStream + IsOnlineKwsStreamReady ) @@ -69,6 +78,7 @@ install( FILES ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js + ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js "$/sherpa-onnx-wasm-nodejs.js" "$/sherpa-onnx-wasm-nodejs.wasm" DESTINATION From 22039113714e9291e66d3a30938b7e9f6babd00a Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 16:56:32 +0800 Subject: [PATCH 02/18] remove debug code --- sherpa-onnx/c-api/c-api.cc | 2 -- sherpa-onnx/csrc/keyword-spotter-transducer-impl.h | 3 --- wasm/kws/CMakeLists.txt | 2 +- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index f1bcbda1f..14ebefcda 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -710,10 +710,8 @@ SherpaOnnxOnlineKws *CreateOnlineKws( SherpaOnnxOnlineStream *CreateOnlineKwsStream( const SherpaOnnxOnlineKws *kws_recognizer) { - SHERPA_ONNX_LOGE("c-api.cc : create stream"); SherpaOnnxOnlineStream *stream = new SherpaOnnxOnlineStream(kws_recognizer->impl->CreateStream()); - SHERPA_ONNX_LOGE("c-api.cc : create stream done"); return stream; } diff --git a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h index ee67e8976..4b6852212 100644 --- a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h +++ b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h @@ -98,11 +98,9 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { #endif std::unique_ptr CreateStream() const override { - SHERPA_ONNX_LOGE("test impl: create stream"); auto stream = std::make_unique(config_.feat_config, keywords_graph_); InitOnlineStream(stream.get()); - SHERPA_ONNX_LOGE("impl: create stream done"); return stream; } @@ -273,7 +271,6 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { // Due to the limitations of the wasm file system, // the keyword_file variable is directly parsed as a string of keywords // if WASM KWS on - SHERPA_ONNX_LOGE("SHERPA_ONNX_ENABLE_WASM_KWS ON : keyword is %s", config_.keywords_file.c_str()); std::istringstream is(config_.keywords_file); InitKeywords(is); #else diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt index ba435f082..01780649c 100644 --- a/wasm/kws/CMakeLists.txt +++ b/wasm/kws/CMakeLists.txt @@ -27,7 +27,7 @@ endforeach() list(JOIN mangled_exported_functions "," all_exported_functions) include_directories(${CMAKE_SOURCE_DIR}) -set(MY_FLAGS "-g -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") +set(MY_FLAGS "-s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") From ec373d1de3a6365bf26ecd9815ed29e8030eaa25 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 17:12:14 +0800 Subject: [PATCH 03/18] add readme --- wasm/kws/assets/README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 wasm/kws/assets/README.md diff --git a/wasm/kws/assets/README.md b/wasm/kws/assets/README.md new file mode 100644 index 000000000..ac67fb5a0 --- /dev/null +++ b/wasm/kws/assets/README.md @@ -0,0 +1,27 @@ +# Introduction + +Please refer to +https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/summary +to download a model. + +# Kws + +The following is an example: +``` +cd sherpa-onnx/wasm/kws +git clone https://www.modelscope.cn/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.git assets +``` + +You should have the following files in `assets` before you can run +`build-wasm-simd-kws.sh` + +``` +├── decoder-epoch-12-avg-2-chunk-16-left-64.onnx +├── encoder-epoch-12-avg-2-chunk-16-left-64.onnx +├── joiner-epoch-12-avg-2-chunk-16-left-64.onnx +├── keywords_raw.txt +├── keywords.txt +├── README.md +└── tokens.txt + +``` From 7c3ba66b9381b0effbb87fa6ad76cc0aae629dc1 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 17:15:07 +0800 Subject: [PATCH 04/18] add keywords --- wasm/kws/sherpa-onnx-kws.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wasm/kws/sherpa-onnx-kws.js b/wasm/kws/sherpa-onnx-kws.js index ea6e6c863..5d4f63147 100644 --- a/wasm/kws/sherpa-onnx-kws.js +++ b/wasm/kws/sherpa-onnx-kws.js @@ -210,7 +210,8 @@ function createKws(Module, myConfig) { numTrailingBlanks: 1, keywordsScore: 1.0, keywordsThreshold: 0.25, - keywords: "x iǎo s ài @小赛" + keywords: "x iǎo ài t óng x ué @小爱同学\n" + + "j ūn g ē n iú b ī @军哥牛逼" }; if (myConfig) { From 33559d3e1d1c83c08a213777c84b2c91e280069b Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 17:22:11 +0800 Subject: [PATCH 05/18] add build script --- build-wasm-simd-kws.sh | 56 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 build-wasm-simd-kws.sh diff --git a/build-wasm-simd-kws.sh b/build-wasm-simd-kws.sh new file mode 100644 index 000000000..f1fafd168 --- /dev/null +++ b/build-wasm-simd-kws.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd-kws +pushd build-wasm-simd-kws + +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DSHERPA_ONNX_ENABLE_GPU=OFF \ + -DSHERPA_ONNX_ENABLE_WASM=ON \ + -DSHERPA_ONNX_ENABLE_WASM_KWS=ON \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ + .. +make -j48 +make install + +ls -lh install/bin/wasm From 4cb1074d459f6045495f123718036190666b284e Mon Sep 17 00:00:00 2001 From: lovemefan Date: Fri, 8 Mar 2024 17:24:01 +0800 Subject: [PATCH 06/18] adjust thread num --- build-wasm-simd-kws.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-wasm-simd-kws.sh b/build-wasm-simd-kws.sh index f1fafd168..8310c2098 100644 --- a/build-wasm-simd-kws.sh +++ b/build-wasm-simd-kws.sh @@ -50,7 +50,7 @@ cmake \ -DSHERPA_ONNX_ENABLE_BINARY=OFF \ -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ .. -make -j48 +make -j8 make install ls -lh install/bin/wasm From c02d663831b0188c6b0778e755fc9a7194ea3c2f Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sat, 9 Mar 2024 15:47:14 +0800 Subject: [PATCH 07/18] copy the code from pull request #642 --- sherpa-onnx/c-api/c-api.cc | 339 ++++++++++++++++++++----------------- sherpa-onnx/c-api/c-api.h | 228 +++++++++++++------------ 2 files changed, 302 insertions(+), 265 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 14ebefcda..98b96c04e 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -411,6 +411,189 @@ void DestroyOfflineRecognizerResult( } } +// ============================================================ +// For Keyword Spot +// ============================================================ + +struct SherpaOnnxKeywordSpotter { + std::unique_ptr impl; +}; + +SherpaOnnxKeywordSpotter* CreateKeywordSpotter( + const SherpaOnnxKeywordSpotterConfig* config) { + sherpa_onnx::KeywordSpotterConfig spotter_config; + + spotter_config.feat_config.sampling_rate = + SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); + spotter_config.feat_config.feature_dim = + SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); + + spotter_config.model_config.transducer.encoder = + SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); + spotter_config.model_config.transducer.decoder = + SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); + spotter_config.model_config.transducer.joiner = + SHERPA_ONNX_OR(config->model_config.transducer.joiner, ""); + + spotter_config.model_config.paraformer.encoder = + SHERPA_ONNX_OR(config->model_config.paraformer.encoder, ""); + spotter_config.model_config.paraformer.decoder = + SHERPA_ONNX_OR(config->model_config.paraformer.decoder, ""); + + spotter_config.model_config.zipformer2_ctc.model = + SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, ""); + + spotter_config.model_config.tokens = + SHERPA_ONNX_OR(config->model_config.tokens, ""); + spotter_config.model_config.num_threads = + SHERPA_ONNX_OR(config->model_config.num_threads, 1); + spotter_config.model_config.provider = + SHERPA_ONNX_OR(config->model_config.provider, "cpu"); + spotter_config.model_config.model_type = + SHERPA_ONNX_OR(config->model_config.model_type, ""); + spotter_config.model_config.debug = + SHERPA_ONNX_OR(config->model_config.debug, 0); + + spotter_config.max_active_paths = + SHERPA_ONNX_OR(config->max_active_paths, 4); + + spotter_config.num_trailing_blanks = + SHERPA_ONNX_OR(config->num_trailing_blanks , 1); + + spotter_config.keywords_score = + SHERPA_ONNX_OR(config->keywords_score, 1.0); + + spotter_config.keywords_threshold = + SHERPA_ONNX_OR(config->keywords_threshold, 0.25); + + spotter_config.keywords_file = + SHERPA_ONNX_OR(config->keywords_file, ""); + + if (config->model_config.debug) { + SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str()); + } + + if (!spotter_config.Validate()) { + SHERPA_ONNX_LOGE("Errors in config!"); + return nullptr; + } + + SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter; + + spotter->impl = + std::make_unique(spotter_config); + + return spotter; +} + +void DestroyKeywordSpotter(SherpaOnnxKeywordSpotter* spotter) { + delete spotter; +} + +SherpaOnnxOnlineStream* CreateKeywordStream( + const SherpaOnnxKeywordSpotter* spotter) { + SherpaOnnxOnlineStream* stream = + new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); + return stream; +} + +int32_t IsKeywordStreamReady( + SherpaOnnxKeywordSpotter* spotter, SherpaOnnxOnlineStream* stream) { + return spotter->impl->IsReady(stream->impl.get()); +} + +void DecodeKeywordStream(SherpaOnnxKeywordSpotter* spotter, + SherpaOnnxOnlineStream* stream) { + return spotter->impl->DecodeStream(stream->impl.get()); +} + +void DecodeMultipleKeywordStreams( + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream **streams, + int32_t n) { + std::vector ss(n); + for (int32_t i = 0; i != n; ++i) { + ss[i] = streams[i]->impl.get(); + } + spotter->impl->DecodeStreams(ss.data(), n); +} + +const SherpaOnnxKeywordResult *GetKeywordResult( + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream) { + const sherpa_onnx::KeywordResult& result = + spotter->impl->GetResult(stream->impl.get()); + const auto &keyword = result.keyword; + + auto r = new SherpaOnnxKeywordResult; + memset(r, 0, sizeof(SherpaOnnxKeywordResult)); + + r->start_time = result.start_time; + + // copy keyword + r->keyword = new char[keyword.size() + 1]; + std::copy(keyword.begin(), keyword.end(), const_cast(r->keyword)); + const_cast(r->keyword)[keyword.size()] = 0; + + // copy json + const auto &json = result.AsJsonString(); + r->json = new char[json.size() + 1]; + std::copy(json.begin(), json.end(), const_cast(r->json)); + const_cast(r->json)[json.size()] = 0; + + // copy tokens + auto count = result.tokens.size(); + if (count > 0) { + size_t total_length = 0; + for (const auto &token : result.tokens) { + // +1 for the null character at the end of each token + total_length += token.size() + 1; + } + + r->count = count; + // Each word ends with nullptr + r->tokens = new char[total_length]; + memset(reinterpret_cast(const_cast(r->tokens)), 0, + total_length); + char **tokens_temp = new char *[r->count]; + int32_t pos = 0; + for (int32_t i = 0; i < r->count; ++i) { + tokens_temp[i] = const_cast(r->tokens) + pos; + memcpy(reinterpret_cast(const_cast(r->tokens + pos)), + result.tokens[i].c_str(), result.tokens[i].size()); + // +1 to move past the null character + pos += result.tokens[i].size() + 1; + } + r->tokens_arr = tokens_temp; + + if (!result.timestamps.empty()) { + r->timestamps = new float[result.timestamps.size()]; + std::copy(result.timestamps.begin(), result.timestamps.end(), + r->timestamps); + } else { + r->timestamps = nullptr; + } + + } else { + r->count = 0; + r->timestamps = nullptr; + r->tokens = nullptr; + r->tokens_arr = nullptr; + } + + return r; +} + +void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { + if (r) { + delete[] r->keyword; + delete[] r->json; + delete[] r->tokens; + delete[] r->tokens_arr; + delete[] r->timestamps; + delete r; + } +} + + // ============================================================ // For VAD // ============================================================ @@ -648,158 +831,4 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio( int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); -} - -struct SherpaOnnxOnlineKws { - std::unique_ptr impl; -}; - -// ============================================================ -// For KWS -// ============================================================ -// -SherpaOnnxOnlineKws *CreateOnlineKws( - const SherpaOnnxOnlineKwsConfig *config) { - - sherpa_onnx::KeywordSpotterConfig kws_config; - - kws_config.feat_config.sampling_rate = - SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); - - kws_config.feat_config.feature_dim = - SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); - - kws_config.model_config.transducer.encoder = - SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); - - kws_config.model_config.transducer.decoder = - SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); - - kws_config.model_config.transducer.joiner = - SHERPA_ONNX_OR(config->model_config.transducer.joiner, ""); - - kws_config.model_config.tokens = - SHERPA_ONNX_OR(config->model_config.tokens, ""); - - kws_config.model_config.num_threads = - SHERPA_ONNX_OR(config->model_config.num_threads, 1); - - kws_config.max_active_paths = - SHERPA_ONNX_OR(config->max_active_paths, 4); - - kws_config.num_trailing_blanks = - SHERPA_ONNX_OR(config->num_trailing_blanks, 1); - - kws_config.num_trailing_blanks = - SHERPA_ONNX_OR(config->keywords_score, 1.0); - - kws_config.keywords_threshold = - SHERPA_ONNX_OR(config->keywords_threshold, 0.25); - - kws_config.keywords_file = SHERPA_ONNX_OR(config->keywords, ""); - - SHERPA_ONNX_LOGE("%s\n", kws_config.ToString().c_str()); - - SherpaOnnxOnlineKws *kws_recognizer = new SherpaOnnxOnlineKws; - - kws_recognizer->impl = - std::make_unique(kws_config); - - return kws_recognizer; -} - -SherpaOnnxOnlineStream *CreateOnlineKwsStream( - const SherpaOnnxOnlineKws *kws_recognizer) { - SherpaOnnxOnlineStream *stream = - new SherpaOnnxOnlineStream(kws_recognizer->impl->CreateStream()); - return stream; -} - -void DestroyOnlineKwsStream(SherpaOnnxOnlineStream *stream) { delete stream; } - -void DestroyOnlineKws(SherpaOnnxOnlineKws *recognizer) { - delete recognizer; -} - -int32_t IsOnlineKwsStreamReady(SherpaOnnxOnlineKws *recognizer, - SherpaOnnxOnlineStream *stream) { - return recognizer->impl->IsReady(stream->impl.get()); -} - -void DecodeOnlineKwsStream(SherpaOnnxOnlineKws *recognizer, - SherpaOnnxOnlineStream *stream) { - recognizer->impl->DecodeStream(stream->impl.get()); -} - -const SherpaOnnxOnlineKwsResult *GetOnlineKwsStreamResult( - SherpaOnnxOnlineKws *recognizer, SherpaOnnxOnlineStream *stream) { - sherpa_onnx::KeywordResult result = - recognizer->impl->GetResult(stream->impl.get()); - const auto &text = result.keyword; - - auto r = new SherpaOnnxOnlineKwsResult; - memset(r, 0, sizeof(SherpaOnnxOnlineKwsResult)); - - // copy text - r->keyword = new char[text.size() + 1]; - std::copy(text.begin(), text.end(), const_cast(r->keyword)); - const_cast(r->keyword)[text.size()] = 0; - - // copy json - const auto &json = result.AsJsonString(); - r->json = new char[json.size() + 1]; - std::copy(json.begin(), json.end(), const_cast(r->json)); - const_cast(r->json)[json.size()] = 0; - - // copy tokens - auto count = result.tokens.size(); - if (count > 0) { - size_t total_length = 0; - for (const auto &token : result.tokens) { - // +1 for the null character at the end of each token - total_length += token.size() + 1; - } - - // Each word ends with nullptr - r->tokens = new char[total_length]; - memset(reinterpret_cast(const_cast(r->tokens)), 0, - total_length); - char **tokens_temp = new char *[count]; - int32_t pos = 0; - for (int32_t i = 0; i < count; ++i) { - tokens_temp[i] = const_cast(r->tokens) + pos; - memcpy(reinterpret_cast(const_cast(r->tokens + pos)), - result.tokens[i].c_str(), result.tokens[i].size()); - // +1 to move past the null character - pos += result.tokens[i].size() + 1; - } - r->tokens_arr = tokens_temp; - - if (!result.timestamps.empty()) { - r->timestamps = new float[count]; - std::copy(result.timestamps.begin(), result.timestamps.end(), - r->timestamps); - } else { - r->timestamps = nullptr; - } - - } else { - r->timestamps = nullptr; - r->tokens = nullptr; - r->tokens_arr = nullptr; - } - - return r; -} - -void DestroyOnlineKwsResult(const SherpaOnnxOnlineKwsResult *r) { - if (r) { - delete[] r->keyword; - delete[] r->json; - delete[] r->tokens; - delete[] r->tokens_arr; - delete[] r->timestamps; - delete r; - } -} - +} \ No newline at end of file diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 7cab8f1f2..890a3538a 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -473,6 +473,123 @@ SHERPA_ONNX_API const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( SHERPA_ONNX_API void DestroyOfflineRecognizerResult( const SherpaOnnxOfflineRecognizerResult *r); +// ============================================================ +// For Keyword Spot +// ============================================================ +SHERPA_ONNX_API typedef struct SherpaOnnxKeywordResult { + /// The triggered keyword. + /// For English, it consists of space separated words. + /// For Chinese, it consists of Chinese words without spaces. + /// Example 1: "hello world" + /// Example 2: "你好世界" + const char* keyword; + + /// Decoded results at the token level. + /// For instance, for BPE-based models it consists of a list of BPE tokens. + const char* tokens; + + const char* const* tokens_arr; + + int32_t count; + + /// timestamps.size() == tokens.size() + /// timestamps[i] records the time in seconds when tokens[i] is decoded. + float* timestamps; + + /// Starting time of this segment. + /// When an endpoint is detected, it will change + float start_time; + + /** Return a json string. + * + * The returned string contains: + * { + * "keyword": "The triggered keyword", + * "tokens": [x, x, x], + * "timestamps": [x, x, x], + * "start_time": x, + * } + */ + const char* json; +} SherpaOnnxKeywordResult; + +SHERPA_ONNX_API typedef struct SherpaOnnxKeywordSpotterConfig { + SherpaOnnxFeatureConfig feat_config; + SherpaOnnxOnlineModelConfig model_config; + int32_t max_active_paths; + int32_t num_trailing_blanks; + float keywords_score; + float keywords_threshold; + const char* keywords_file; +} SherpaOnnxKeywordSpotterConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxKeywordSpotter + SherpaOnnxKeywordSpotter; + +/// @param config Config for the keyword spotter. +/// @return Return a pointer to the spotter. The user has to invoke +/// DestroyKeywordSpotter() to free it to avoid memory leak. +SHERPA_ONNX_API SherpaOnnxKeywordSpotter* CreateKeywordSpotter( + const SherpaOnnxKeywordSpotterConfig* config); + +/// Free a pointer returned by CreateKeywordSpotter() +/// +/// @param p A pointer returned by CreateKeywordSpotter() +SHERPA_ONNX_API void DestroyKeywordSpotter( + SherpaOnnxKeywordSpotter* spotter); + +/// Create an online stream for accepting wave samples. +/// +/// @param spotter A pointer returned by CreateKeywordSpotter() +/// @return Return a pointer to an OnlineStream. The user has to invoke +/// DestroyOnlineStream() to free it to avoid memory leak. +SHERPA_ONNX_API SherpaOnnxOnlineStream* CreateKeywordStream( + const SherpaOnnxKeywordSpotter* spotter); + +/// Return 1 if there are enough number of feature frames for decoding. +/// Return 0 otherwise. +/// +/// @param spotter A pointer returned by CreateKeywordSpotter +/// @param stream A pointer returned by CreateKeywordStream +SHERPA_ONNX_API int32_t IsKeywordStreamReady( + SherpaOnnxKeywordSpotter* spotter, SherpaOnnxOnlineStream* stream); + +/// Call this function to run the neural network model and decoding. +// +/// Precondition for this function: IsKeywordStreamReady() MUST return 1. +SHERPA_ONNX_API void DecodeKeywordStream(SherpaOnnxKeywordSpotter* spotter, + SherpaOnnxOnlineStream* stream); + +/// This function is similar to DecodeKeywordStream(). It decodes multiple +/// OnlineStream in parallel. +/// +/// Caution: The caller has to ensure each OnlineStream is ready, i.e., +/// IsKeywordStreamReady() for that stream should return 1. +/// +/// @param spotter A pointer returned by CreateKeywordSpotter() +/// @param streams A pointer array containing pointers returned by +/// CreateKeywordStream() +/// @param n Number of elements in the given streams array. +SHERPA_ONNX_API void DecodeMultipleKeywordStreams( + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream **streams, + int32_t n); + +/// Get the decoding results so far for an OnlineStream. +/// +/// @param recognizer A pointer returned by CreateKeywordSpotter(). +/// @param stream A pointer returned by CreateKeywordStream(). +/// @return A pointer containing the result. The user has to invoke +/// DestroyKeywordResult() to free the returned pointer to +/// avoid memory leak. +SHERPA_ONNX_API const SherpaOnnxKeywordResult *GetKeywordResult( + SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream); + +/// Destroy the pointer returned by GetKeywordResult(). +/// +/// @param r A pointer returned by GetKeywordResult() +SHERPA_ONNX_API void DestroyKeywordResult( + const SherpaOnnxKeywordResult *r); + // ============================================================ // For VAD // ============================================================ @@ -689,115 +806,6 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename); -// ============================================================ -// For online KWS -// ============================================================ - -SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsModelConfig { - SherpaOnnxOnlineTransducerModelConfig transducer; - const char *tokens; - int32_t num_threads; -} SherpaOnnxOnlineKwsModelConfig; - -SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsConfig { - SherpaOnnxFeatureConfig feat_config; - SherpaOnnxOnlineKwsModelConfig model_config; - - /// Used only when decoding_method is modified_beam_search - /// Example value: 4 - int32_t max_active_paths; - int32_t num_trailing_blanks; - float keywords_score; - float keywords_threshold; - /// Path to the keywords. - const char *keywords; -} SherpaOnnxOnlineKwsConfig; - - -SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKwsResult { - // Recognized text - const char *keyword; - - // Pointer to continuous memory which holds string based tokens - // which are separated by \0 - const char *tokens; - - // a pointer array containing the address of the first item in tokens - const char *const *tokens_arr; - - // Pointer to continuous memory which holds timestamps - float *timestamps; - - /** Return a json string. - * - * The returned string contains: - * { - * "keyword": "The kws keyword result", - * "tokens": [x, x, x], - * "timestamps": [x, x, x], - * } - */ - const char *json; -} SherpaOnnxOnlineKwsResult; - -SHERPA_ONNX_API typedef struct SherpaOnnxOnlineKws SherpaOnnxOnlineKws; - -/// @param config Config for the kws recognizer. -/// @return Return a pointer to the kws recognizer. The user has to invoke -// DestroyOnlineKws() to free it to avoid memory leak. -SHERPA_ONNX_API SherpaOnnxOnlineKws *CreateOnlineKws( - const SherpaOnnxOnlineKwsConfig *config); - -SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineKwsStream( - const SherpaOnnxOnlineKws *kws_recognizer); - -/// Free a pointer returned by CreateOnlineKws() -/// @param recognizer A pointer returned by CreateOnlineKws() -SHERPA_ONNX_API void DestroyOnlineKws( - SherpaOnnxOnlineKws *recognizer); - -/// Destroy an online stream. -/// @param stream A pointer returned by CreateOnlineStream() -SHERPA_ONNX_API void DestroyOnlineKwsStream(SherpaOnnxOnlineStream *stream); - -/// Get the decoding results so far for an OnlineKwsStream. -/// -/// @param recognizer A pointer returned by CreateOnlineKws(). -/// @param stream A pointer returned by CreateOnlineKwsStream(). -/// @return A pointer containing the result. The user has to invoke -/// DestroyOnlineKwsResult() to free the returned pointer to -/// avoid memory leak. -SHERPA_ONNX_API const SherpaOnnxOnlineKwsResult *GetOnlineKwsStreamResult( - SherpaOnnxOnlineKws *recognizer, SherpaOnnxOnlineStream *stream); - -/// Destroy the pointer returned by GetOnlineKwsStreamResult(). -/// -/// @param r A pointer returned by GetOnlineKwsStreamResult() -SHERPA_ONNX_API void DestroyOnlineKwsResult( - const SherpaOnnxOnlineKwsResult *r); - -/// Return 1 if there are enough number of feature frames for decoding. -/// Return 0 otherwise. -/// -/// @param kws_recognizer A pointer returned by CreateOnlineKws -/// @param stream A pointer returned by CreateOnlineKwsStream -SHERPA_ONNX_API int32_t IsOnlineKwsStreamReady( - SherpaOnnxOnlineKws *kws_recognizer, SherpaOnnxOnlineStream *stream); - - -/// Call this function to run the neural network model and decoding. -// -/// Precondition for this function: IsOnlineStreamReady() MUST return 1. -/// -/// Usage example: -/// -/// while (IsOnlineKwsStreamReady(recognizer, stream)) { -/// DecodeOnlineKwsStream(recognizer, stream); -/// } -/// -SHERPA_ONNX_API void DecodeOnlineKwsStream(SherpaOnnxOnlineKws *kws_recognizer, - SherpaOnnxOnlineStream *stream); - #if defined(__GNUC__) #pragma GCC diagnostic pop #endif @@ -806,4 +814,4 @@ SHERPA_ONNX_API void DecodeOnlineKwsStream(SherpaOnnxOnlineKws *kws_recognizer, } /* extern "C" */ #endif -#endif // SHERPA_ONNX_C_API_C_API_H_ +#endif // SHERPA_ONNX_C_API_C_API_H_ \ No newline at end of file From 3afb57f535c334faa50c95e4324d3141d2143830 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sat, 9 Mar 2024 15:47:52 +0800 Subject: [PATCH 08/18] update the error message --- wasm/kws/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt index 01780649c..3baaa630b 100644 --- a/wasm/kws/CMakeLists.txt +++ b/wasm/kws/CMakeLists.txt @@ -1,5 +1,5 @@ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) - message(FATAL_ERROR "Please use ./build-wasm-kws.sh to build for wasm KWS") + message(FATAL_ERROR "Please use ./build-wasm-simd-kws.sh to build for wasm KWS") endif() if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx") From 270bbae2562913ec0b2ace6896d9717fd20454a3 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 21:33:56 +0800 Subject: [PATCH 09/18] adapt to c api of pull request #642 --- wasm/kws/CMakeLists.txt | 15 ++-- wasm/kws/sherpa-onnx-kws.js | 114 ++++++++++++++++++-------- wasm/kws/sherpa-onnx-wasm-main-kws.cc | 4 +- 3 files changed, 87 insertions(+), 46 deletions(-) diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt index 3baaa630b..f083892cc 100644 --- a/wasm/kws/CMakeLists.txt +++ b/wasm/kws/CMakeLists.txt @@ -9,14 +9,13 @@ endif() set(exported_functions AcceptWaveform - CreateOnlineKws - CreateOnlineKwsStream - GetOnlineKwsStreamResult - DecodeOnlineKwsStream - DestroyOnlineKws - DestroyOnlineKwsResult - DestroyOnlineKwsStream - IsOnlineKwsStreamReady + CreateKeywordSpotter + DestroyKeywordSpotter + CreateKeywordStream + DecodeKeywordStream + GetKeywordResult + DestroyKeywordResult + IsKeywordStreamReady InputFinished ) set(mangled_exported_functions) diff --git a/wasm/kws/sherpa-onnx-kws.js b/wasm/kws/sherpa-onnx-kws.js index 5d4f63147..7d91745e3 100644 --- a/wasm/kws/sherpa-onnx-kws.js +++ b/wasm/kws/sherpa-onnx-kws.js @@ -7,50 +7,89 @@ function freeConfig(config, Module) { Module._free(config.ptr); } -// The user should free the returned pointers -function initModelConfig(config, Module) { - let encoderBinLen = Module.lengthBytesUTF8(config.transducer.encoder) + 1; - let decoderBinLen = Module.lengthBytesUTF8(config.transducer.decoder) + 1; - let joinerBinLen = Module.lengthBytesUTF8(config.transducer.joiner) + 1; +function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; - let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + const n = encoderLen + decoderLen + joinerLen; - let n = encoderBinLen + decoderBinLen + joinerBinLen + tokensLen; + const buffer = Module._malloc(n); - let buffer = Module._malloc(n); - let ptr = Module._malloc(4 * 5); + const len = 3 * 4; // 3 pointers + const ptr = Module._malloc(len); let offset = 0; - Module.stringToUTF8(config.transducer.encoder, buffer + offset, encoderBinLen); - offset += encoderBinLen; + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); + offset += encoderLen; - Module.stringToUTF8(config.transducer.decoder, buffer + offset, decoderBinLen); - offset += encoderBinLen; + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); + offset += decoderLen; - Module.stringToUTF8(config.transducer.joiner, buffer + offset, joinerBinLen); - offset += joinerBinLen; - - Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); - offset += tokensLen; + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); offset = 0; - Module.setValue(ptr, buffer + offset, 'i8*'); // encoderBin - offset += encoderBinLen; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += encoderLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += decoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + + return { + buffer: buffer, ptr: ptr, len: len, + } +} - Module.setValue(ptr + 4, buffer + offset, 'i8*'); // decoderBin - offset += decoderBinLen; +// The user should free the returned pointers +function initModelConfig(config, Module) { + const transducer = + initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); + const paraformer_len = 2 * 4 + const ctc_len = 1 * 4 + + const len = transducer.len + paraformer_len + ctc_len + 5 * 4; + const ptr = Module._malloc(len); - Module.setValue(ptr + 8, buffer + offset, 'i8*'); // joinerBin - offset += joinerBinLen; + let offset = 0; + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); - Module.setValue(ptr + 12, buffer + offset, 'i8*'); // tokens + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + const providerLen = Module.lengthBytesUTF8(config.provider) + 1; + const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1; + const bufferLen = tokensLen + providerLen + modelTypeLen; + const buffer = Module._malloc(bufferLen); + + offset = 0; + Module.stringToUTF8(config.tokens, buffer, tokensLen); offset += tokensLen; - Module.setValue(ptr + 16, config.numThreads, 'i32'); // numThread + Module.stringToUTF8(config.provider, buffer + offset, providerLen); + offset += providerLen; + + Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen); + + offset = transducer.len + paraformer_len + ctc_len; + Module.setValue(ptr + offset, buffer, 'i8*'); // tokens + offset += 4; + + Module.setValue(ptr + offset, config.numThreads, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider + offset += 4; + + Module.setValue(ptr + offset, config.debug, 'i32'); + offset += 4; + + Module.setValue( + ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType + offset += 4; return { - buffer: buffer, ptr: ptr, len: 20, + buffer: buffer, ptr: ptr, len: len, } } @@ -147,7 +186,7 @@ class Kws { constructor(configObj, Module) { this.config = configObj; let config = initKwsConfig(configObj, Module) - let handle = Module._CreateOnlineKws(config.ptr); + let handle = Module._CreateKeywordSpotter(config.ptr); freeConfig(config.featConfig, Module); @@ -159,29 +198,29 @@ class Kws { } free() { - this.Module._DestroyOnlineKws(this.handle); + this.Module._DestroyKeywordSpotter(this.handle); this.handle = 0 } createStream() { - let handle = this.Module._CreateOnlineKwsStream(this.handle); + let handle = this.Module._CreateKeywordStream(this.handle); return new Stream(handle, this.Module); } isReady(stream) { - return this.Module._IsOnlineKwsStreamReady(this.handle, stream.handle) === 1; + return this.Module._IsKeywordStreamReady(this.handle, stream.handle) === 1; } decode(stream) { - return this.Module._DecodeOnlineKwsStream(this.handle, stream.handle); + return this.Module._DecodeKeywordStream(this.handle, stream.handle); } getResult(stream) { - let r = this.Module._GetOnlineKwsStreamResult(this.handle, stream.handle); - let jsonPtr = this.Module.getValue(r + 16, 'i8*'); + let r = this.Module._GetKeywordResult(this.handle, stream.handle); + let jsonPtr = this.Module.getValue(r + 24, 'i8*'); let json = this.Module.UTF8ToString(jsonPtr); - this.Module._DestroyOnlineKwsResult(r); + this.Module._DestroyKeywordResult(r); return JSON.parse(json); } } @@ -195,7 +234,10 @@ function createKws(Module, myConfig) { let modelConfig = { transducer: transducerConfig, tokens: './tokens.txt', - numThreads: 1 + provider: 'cpu', + modelType: "", + numThreads: 1, + debug: 1 }; let featConfig = { diff --git a/wasm/kws/sherpa-onnx-wasm-main-kws.cc b/wasm/kws/sherpa-onnx-wasm-main-kws.cc index 9c79d56a7..0ddbdae30 100644 --- a/wasm/kws/sherpa-onnx-wasm-main-kws.cc +++ b/wasm/kws/sherpa-onnx-wasm-main-kws.cc @@ -19,9 +19,9 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4, ""); static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineKwsConfig) == +static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == sizeof(SherpaOnnxFeatureConfig) + - sizeof(SherpaOnnxOnlineKwsModelConfig) + 5 * 4, + sizeof(SherpaOnnxOnlineModelConfig) + 5 * 4, ""); void CopyHeap(const char *src, int32_t num_bytes, char *dst) { From 47d84f5bfafb36b48eb72b4bb748b0d67548931d Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 21:37:12 +0800 Subject: [PATCH 10/18] Enable keyword modification without recompilation in the solution --- sherpa-onnx/c-api/c-api.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 98b96c04e..1c2c6e295 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -473,10 +473,16 @@ SherpaOnnxKeywordSpotter* CreateKeywordSpotter( SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str()); } +#ifndef SHERPA_ONNX_ENABLE_WASM_KWS + // due to the limitations of the wasm file system, + // keywords file will be packaged into the sherpa-onnx-wasm-kws-main.data file + // Solution: take keyword_file variable is directly + // parsed as a string of keywords if (!spotter_config.Validate()) { SHERPA_ONNX_LOGE("Errors in config!"); return nullptr; } +#endif SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter; From 8623a4d3c9c36f47fe154aec2d5de2460f62ff0b Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 21:56:11 +0800 Subject: [PATCH 11/18] format code style --- sherpa-onnx/c-api/c-api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 1c2c6e295..6043245ec 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -837,4 +837,4 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio( int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); -} \ No newline at end of file +} From 6cb41efe6a3d92b0e3c2fa83b20449946901f068 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 21:56:25 +0800 Subject: [PATCH 12/18] update c api --- wasm/nodejs/CMakeLists.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt index faab65660..f90387e9b 100644 --- a/wasm/nodejs/CMakeLists.txt +++ b/wasm/nodejs/CMakeLists.txt @@ -38,14 +38,13 @@ set(exported_functions GetOfflineStreamResult DestroyOfflineRecognizerResult # online kws - CreateOnlineKws - CreateOnlineKwsStream - GetOnlineKwsStreamResult - DecodeOnlineKwsStream - DestroyOnlineKws - DestroyOnlineKwsResult - DestroyOnlineKwsStream - IsOnlineKwsStreamReady + CreateKeywordSpotter + DestroyKeywordSpotter + CreateKeywordStream + DecodeKeywordStream + GetKeywordResult + DestroyKeywordResult + IsKeywordStreamReady ) From 4e37fdd7b50b98cec4910005f62177d3bff30e2d Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 22:01:29 +0800 Subject: [PATCH 13/18] code style format: add a newline character at the end of the file --- sherpa-onnx/c-api/c-api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 890a3538a..a6a7389c2 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -814,4 +814,4 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, } /* extern "C" */ #endif -#endif // SHERPA_ONNX_C_API_C_API_H_ \ No newline at end of file +#endif // SHERPA_ONNX_C_API_C_API_H_ From be354fc5bade63e8d6df6ffb3fe4ee39cfe4e682 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Sun, 10 Mar 2024 22:16:33 +0800 Subject: [PATCH 14/18] code style format: delete empty line --- sherpa-onnx/csrc/keyword-spotter-transducer-impl.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h index 4b6852212..ef22a9984 100644 --- a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h +++ b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h @@ -266,7 +266,6 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { } void InitKeywords() { - #ifdef SHERPA_ONNX_ENABLE_WASM_KWS // Due to the limitations of the wasm file system, // the keyword_file variable is directly parsed as a string of keywords @@ -283,8 +282,6 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl { } InitKeywords(is); #endif - - } #if __ANDROID_API__ >= 9 From a026fba750c58b0db73f9a0c664f7dd7dfa70b61 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Mon, 11 Mar 2024 09:50:40 +0800 Subject: [PATCH 15/18] add copyright information --- wasm/kws/sherpa-onnx-wasm-main-kws.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wasm/kws/sherpa-onnx-wasm-main-kws.cc b/wasm/kws/sherpa-onnx-wasm-main-kws.cc index 0ddbdae30..832e525d9 100644 --- a/wasm/kws/sherpa-onnx-wasm-main-kws.cc +++ b/wasm/kws/sherpa-onnx-wasm-main-kws.cc @@ -1,3 +1,6 @@ +// wasm/sherpa-onnx-wasm-main-kws.cc +// +// Copyright (c) 2024 lovemefan #include #include From 7b7ee1711118b5f5797cefa067da7a64f7f456df Mon Sep 17 00:00:00 2001 From: lovemefan Date: Mon, 11 Mar 2024 11:15:10 +0800 Subject: [PATCH 16/18] merged latest code --- .github/workflows/build-wheels-aarch64.yaml | 9 +- .github/workflows/build-wheels-linux.yaml | 14 +- .github/workflows/linux.yaml | 2 +- .github/workflows/riscv64-linux.yaml | 83 ++++-- CMakeLists.txt | 3 + MANIFEST.in | 12 + .../java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt | 3 + build-ios.sh | 2 +- build-riscv64-linux-gnu.sh | 5 +- c-api-examples/CMakeLists.txt | 6 + .../asr-microphone-example/CMakeLists.txt | 9 + .../asr-microphone-example/CPPLINT.cfg | 1 + .../asr-microphone-example/README.md | 12 + c-api-examples/asr-microphone-example/alsa.cc | 1 + c-api-examples/asr-microphone-example/alsa.h | 1 + .../asr-microphone-example/c-api-alsa.cc | 254 ++++++++++++++++++ c-api-examples/decode-file-c-api.c | 2 +- cmake/onnxruntime-linux-aarch64.cmake | 16 +- cmake/onnxruntime-linux-riscv64.cmake | 18 +- cmake/onnxruntime-linux-x86_64.cmake | 16 +- cmake/onnxruntime.cmake | 100 +++---- .../SherpaOnnx.xcodeproj/project.pbxproj | 2 +- .../SherpaOnnx2Pass.xcodeproj/project.pbxproj | 2 +- .../project.pbxproj | 2 +- .../SherpaOnnxTts.xcodeproj/project.pbxproj | 2 +- ...microphone-with-endpoint-detection-alsa.py | 207 ++++++++++++++ ...transducer-modified-beam-search-decoder.cc | 2 +- sherpa-onnx/csrc/session.cc | 2 +- ...nnx-alsa-offline-speaker-identification.cc | 4 +- sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc | 4 +- .../csrc/sherpa-onnx-keyword-spotter-alsa.cc | 4 - sherpa-onnx/jni/jni.cc | 16 ++ sherpa-onnx/python/csrc/CMakeLists.txt | 17 +- sherpa-onnx/python/csrc/alsa.cc | 30 +++ sherpa-onnx/python/csrc/alsa.h | 16 ++ sherpa-onnx/python/csrc/faked-alsa.cc | 45 ++++ sherpa-onnx/python/csrc/sherpa-onnx.cc | 3 + sherpa-onnx/python/sherpa_onnx/__init__.py | 1 + 38 files changed, 801 insertions(+), 127 deletions(-) create mode 100644 MANIFEST.in create mode 100644 c-api-examples/asr-microphone-example/CMakeLists.txt create mode 100644 c-api-examples/asr-microphone-example/CPPLINT.cfg create mode 100644 c-api-examples/asr-microphone-example/README.md create mode 120000 c-api-examples/asr-microphone-example/alsa.cc create mode 120000 c-api-examples/asr-microphone-example/alsa.h create mode 100644 c-api-examples/asr-microphone-example/c-api-alsa.cc create mode 100755 python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py create mode 100644 sherpa-onnx/python/csrc/alsa.cc create mode 100644 sherpa-onnx/python/csrc/alsa.h create mode 100644 sherpa-onnx/python/csrc/faked-alsa.cc diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml index cbd3a4225..6834de8c3 100644 --- a/.github/workflows/build-wheels-aarch64.yaml +++ b/.github/workflows/build-wheels-aarch64.yaml @@ -17,13 +17,14 @@ concurrency: jobs: build_wheels_aarch64: - name: ${{ matrix.python-version }} + name: ${{ matrix.manylinux }} ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + manylinux: [manylinux2014, manylinux_2_28] steps: - uses: actions/checkout@v4 @@ -51,7 +52,7 @@ jobs: CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" CIBW_BUILD_VERBOSITY: 3 CIBW_ARCHS_LINUX: aarch64 - CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/manylinux_2_28_aarch64 + CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64 # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28. # manylinux_2_24 is no longer supported @@ -63,7 +64,7 @@ jobs: ls -lh ./wheelhouse/*.whl - name: Publish to huggingface - if: matrix.python-version == 'cp38' + if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux_2_28' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} uses: nick-fields/retry@v3 @@ -93,7 +94,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheel-${{ matrix.python-version }} + name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }} path: ./wheelhouse/*.whl - name: Publish wheels to PyPI diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml index 329beb37f..50470dcfc 100644 --- a/.github/workflows/build-wheels-linux.yaml +++ b/.github/workflows/build-wheels-linux.yaml @@ -17,13 +17,15 @@ concurrency: jobs: build_wheels_linux: - name: ${{ matrix.python-version }} + name: ${{ matrix.manylinux }} ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + manylinux: [manylinux2014, manylinux_2_28] + steps: - uses: actions/checkout@v4 @@ -46,9 +48,7 @@ jobs: CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" CIBW_BUILD_VERBOSITY: 3 - CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/manylinux_2_28_x86_64 - # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28. - # manylinux_2_24 is no longer supported + CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_x86_64 - name: Display wheels shell: bash @@ -76,7 +76,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheel-${{ matrix.python-version }} + name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }} path: ./wheelhouse/*.whl - name: Publish to huggingface @@ -119,14 +119,14 @@ jobs: twine upload ./wheelhouse/*.whl - name: Build sdist - if: matrix.python-version == 'cp38' + if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux_2_28' shell: bash run: | python3 setup.py sdist ls -l dist/* - name: Publish sdist to PyPI - if: matrix.python-version == 'cp38' + if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux_2_28' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 1f911e034..13374610e 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -227,7 +227,7 @@ jobs: tar cjvf ${dst}.tar.bz2 $dst - name: Release pre-compiled binaries and libs for linux x64 - if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.os == 'ubuntu-20.04' && matrix.gcc_version == '7' uses: svenstaro/upload-release-action@v2 with: file_glob: true diff --git a/.github/workflows/riscv64-linux.yaml b/.github/workflows/riscv64-linux.yaml index 04d196a8f..1a2f53993 100644 --- a/.github/workflows/riscv64-linux.yaml +++ b/.github/workflows/riscv64-linux.yaml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - lib_type: [static, shared] + lib_type: [shared] #, static] steps: - uses: actions/checkout@v4 @@ -55,45 +55,35 @@ jobs: uses: actions/cache@v4 with: path: qemu-install - key: qemu-riscv-install-20240225 - - - name: install-qemu-build-deps - if: steps.cache-qemu.outputs.cache-hit != 'true' - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build - - - name: checkout-qemu - if: steps.cache-qemu.outputs.cache-hit != 'true' - uses: actions/checkout@v3 - with: - repository: qemu/qemu - path: qemu + key: qemu-riscv-xuantie-install-20240306 - name: qemu if: steps.cache-qemu.outputs.cache-hit != 'true' run: | - cd qemu - ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system - make -j2 - make install - ls -lh $GITHUB_WORKSPACE/qemu-install - ls -lh $GITHUB_WORKSPACE/qemu-install/bin + # https://pypi.org/project/xuantie-qemu/#files + wget -q https://files.pythonhosted.org/packages/21/f4/733f29c435987e8bb264a6504c7a4ea4c04d0d431b38a818ab63eef082b9/xuantie_qemu-20230825-py3-none-manylinux1_x86_64.whl + unzip xuantie_qemu-20230825-py3-none-manylinux1_x86_64.whl + mkdir -p qemu-install/bin + + cp -v ./qemu/qemu-riscv64 ./qemu-install/bin - name: cache-toolchain id: cache-toolchain uses: actions/cache@v4 with: path: toolchain - key: riscv64-glibc-ubuntu-20.04-gcc-nightly-2023.10.17-nightly + key: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz - name: Download toolchain if: steps.cache-toolchain.outputs.cache-hit != 'true' shell: bash run: | + wget -q https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz + mkdir $GITHUB_WORKSPACE/toolchain - wget -q https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2023.10.17/riscv64-glibc-ubuntu-20.04-gcc-nightly-2023.10.17-nightly.tar.gz - tar xvf ./riscv64-glibc-ubuntu-20.04-gcc-nightly-2023.10.17-nightly.tar.gz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain + + tar xvf ./Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain + ls -lh $GITHUB_WORKSPACE/toolchain/bin - name: Display toolchain info shell: bash @@ -139,6 +129,7 @@ jobs: export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib ls -lh ./build-riscv64-linux-gnu/bin @@ -154,6 +145,44 @@ jobs: qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts --help readelf -d ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts + - name: Test streaming speech recognition + shell: bash + run: | + export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2 + rm sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2 + + qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx \ + --tokens=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/tokens.txt \ + --encoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/encoder-epoch-99-avg-1.onnx \ + --decoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/decoder-epoch-99-avg-1.onnx \ + --joiner=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/joiner-epoch-99-avg-1.onnx \ + ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/test_wavs/0.wav + + - name: Test offline tts + shell: bash + run: | + export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2 + tar xf vits-piper-en_US-lessac-medium.tar.bz2 + rm vits-piper-en_US-lessac-medium.tar.bz2 + + qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts \ + --vits-model=./vits-piper-en_US-lessac-medium/en_US-lessac-medium.onnx \ + --vits-data-dir=./vits-piper-en_US-lessac-medium/espeak-ng-data \ + --vits-tokens=./vits-piper-en_US-lessac-medium/tokens.txt \ + --output-filename=./liliana-piper-en_US-lessac-medium.wav \ + 'liliana, the most beautiful and lovely assistant of our team!' + - name: Copy files shell: bash run: | @@ -190,6 +219,12 @@ jobs: name: sherpa-onnx-linux-riscv64-shared path: sherpa-onnx-*linux-riscv64-shared.tar.bz2 + - uses: actions/upload-artifact@v4 + if: matrix.lib_type == 'shared' + with: + name: wave + path: ./*.wav + - uses: actions/upload-artifact@v4 if: matrix.lib_type == 'static' with: diff --git a/CMakeLists.txt b/CMakeLists.txt index cf41dbf4f..6af2fa9a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) +option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-installed onnxruntime if available" ON) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") @@ -111,6 +112,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") +message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE}") if(SHERPA_ONNX_ENABLE_WASM_TTS) if(NOT SHERPA_ONNX_ENABLE_WASM) @@ -149,6 +151,7 @@ include(CheckIncludeFileCXX) if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android) check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) if(SHERPA_ONNX_HAS_ALSA) + message(STATUS "With Alsa") add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1) else() message(WARNING "\ diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..4372d1963 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,12 @@ +include LICENSE +include README.md +include CMakeLists.txt +recursive-include c-api-examples *.* +recursive-include sherpa-onnx *.* +recursive-include cmake *.* +prune */__pycache__ +prune android +prune sherpa-onnx/java-api +prune ios-swift +prune ios-swiftui + diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt index 228f19c1f..601ecf83f 100644 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt @@ -75,6 +75,9 @@ data class OfflineParaformerModelConfig( data class OfflineWhisperModelConfig( var encoder: String = "", var decoder: String = "", + var language: String = "en", // Used with multilingual model + var task: String = "transcribe", // transcribe or translate + var tailPaddings: Int = 1000, // Padding added at the end of the samples ) data class OfflineModelConfig( diff --git a/build-ios.sh b/build-ios.sh index 5517a8b44..599a1725f 100755 --- a/build-ios.sh +++ b/build-ios.sh @@ -5,7 +5,7 @@ set -e dir=build-ios mkdir -p $dir cd $dir -onnxruntime_version=1.16.3 +onnxruntime_version=1.17.1 onnxruntime_dir=ios-onnxruntime/$onnxruntime_version if [ ! -f $onnxruntime_dir/onnxruntime.xcframework/ios-arm64/onnxruntime.a ]; then diff --git a/build-riscv64-linux-gnu.sh b/build-riscv64-linux-gnu.sh index 96d65b446..16d9c0d6a 100755 --- a/build-riscv64-linux-gnu.sh +++ b/build-riscv64-linux-gnu.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +set -ex if ! command -v riscv64-unknown-linux-gnu-g++ &> /dev/null; then echo "Please install the toolchain first." @@ -42,8 +43,8 @@ export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then - # By default, use static link - BUILD_SHARED_LIBS=OFF + # By default, use shared libraries + BUILD_SHARED_LIBS=ON fi cmake \ diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 95983cd86..478dd8ee8 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -6,3 +6,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) add_executable(offline-tts-c-api offline-tts-c-api.c) target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) + +if(SHERPA_ONNX_HAS_ALSA) + add_subdirectory(./asr-microphone-example) +else() + message(WARNING "Not include ./asr-microphone-example since alsa is not available") +endif() diff --git a/c-api-examples/asr-microphone-example/CMakeLists.txt b/c-api-examples/asr-microphone-example/CMakeLists.txt new file mode 100644 index 000000000..1c486bb3b --- /dev/null +++ b/c-api-examples/asr-microphone-example/CMakeLists.txt @@ -0,0 +1,9 @@ + +add_executable(c-api-alsa c-api-alsa.cc alsa.cc) +target_link_libraries(c-api-alsa sherpa-onnx-c-api cargs) + +if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}) + target_link_libraries(c-api-alsa -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound) +else() + target_link_libraries(c-api-alsa asound) +endif() diff --git a/c-api-examples/asr-microphone-example/CPPLINT.cfg b/c-api-examples/asr-microphone-example/CPPLINT.cfg new file mode 100644 index 000000000..f1b97ab71 --- /dev/null +++ b/c-api-examples/asr-microphone-example/CPPLINT.cfg @@ -0,0 +1 @@ +exclude_files=alsa.cc|alsa.h diff --git a/c-api-examples/asr-microphone-example/README.md b/c-api-examples/asr-microphone-example/README.md new file mode 100644 index 000000000..50e242352 --- /dev/null +++ b/c-api-examples/asr-microphone-example/README.md @@ -0,0 +1,12 @@ +# Introduction + +This folder contains examples for real-time speech recognition from a microphone +using sherpa-onnx C API. + +**Note**: You can call C API from C++ files. + + +## ./c-api-alsa.cc + +This file uses alsa to read a microphone. It runs only on Linux. This file +does not support macOS or Windows. diff --git a/c-api-examples/asr-microphone-example/alsa.cc b/c-api-examples/asr-microphone-example/alsa.cc new file mode 120000 index 000000000..7acd97ce4 --- /dev/null +++ b/c-api-examples/asr-microphone-example/alsa.cc @@ -0,0 +1 @@ +../../sherpa-onnx/csrc/alsa.cc \ No newline at end of file diff --git a/c-api-examples/asr-microphone-example/alsa.h b/c-api-examples/asr-microphone-example/alsa.h new file mode 120000 index 000000000..cde299589 --- /dev/null +++ b/c-api-examples/asr-microphone-example/alsa.h @@ -0,0 +1 @@ +../../sherpa-onnx/csrc/alsa.h \ No newline at end of file diff --git a/c-api-examples/asr-microphone-example/c-api-alsa.cc b/c-api-examples/asr-microphone-example/c-api-alsa.cc new file mode 100644 index 000000000..8326462b2 --- /dev/null +++ b/c-api-examples/asr-microphone-example/c-api-alsa.cc @@ -0,0 +1,254 @@ +// c-api-examples/asr-microphone-example/c-api-alsa.cc +// Copyright (c) 2022-2024 Xiaomi Corporation + +#include +#include +#include +#include + +#include +#include // std::tolower +#include +#include + +#include "c-api-examples/asr-microphone-example/alsa.h" + +// NOTE: You don't need to use cargs.h in your own project. +// We use it in this file to parse commandline arguments +#include "cargs.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static struct cag_option options[] = { + {.identifier = 'h', + .access_letters = "h", + .access_name = "help", + .description = "Show help"}, + {.identifier = 't', + .access_letters = NULL, + .access_name = "tokens", + .value_name = "tokens", + .description = "Tokens file"}, + {.identifier = 'e', + .access_letters = NULL, + .access_name = "encoder", + .value_name = "encoder", + .description = "Encoder ONNX file"}, + {.identifier = 'd', + .access_letters = NULL, + .access_name = "decoder", + .value_name = "decoder", + .description = "Decoder ONNX file"}, + {.identifier = 'j', + .access_letters = NULL, + .access_name = "joiner", + .value_name = "joiner", + .description = "Joiner ONNX file"}, + {.identifier = 'n', + .access_letters = NULL, + .access_name = "num-threads", + .value_name = "num-threads", + .description = "Number of threads"}, + {.identifier = 'p', + .access_letters = NULL, + .access_name = "provider", + .value_name = "provider", + .description = "Provider: cpu (default), cuda, coreml"}, + {.identifier = 'm', + .access_letters = NULL, + .access_name = "decoding-method", + .value_name = "decoding-method", + .description = + "Decoding method: greedy_search (default), modified_beam_search"}, + {.identifier = 'f', + .access_letters = NULL, + .access_name = "hotwords-file", + .value_name = "hotwords-file", + .description = "The file containing hotwords, one words/phrases per line, " + "and for each phrase the bpe/cjkchar are separated by a " + "space. For example: ▁HE LL O ▁WORLD, 你 好 世 界"}, + {.identifier = 's', + .access_letters = NULL, + .access_name = "hotwords-score", + .value_name = "hotwords-score", + .description = "The bonus score for each token in hotwords. Used only " + "when decoding_method is modified_beam_search"}, +}; + +const char *kUsage = + R"( +Usage: + ./bin/c-api-alsa \ + --tokens=/path/to/tokens.txt \ + --encoder=/path/to/encoder.onnx \ + --decoder=/path/to/decoder.onnx \ + --joiner=/path/to/decoder.onnx \ + device_name + +The device name specifies which microphone to use in case there are several +on your system. You can use + + arecord -l + +to find all available microphones on your computer. For instance, if it outputs + +**** List of CAPTURE Hardware Devices **** +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] + Subdevices: 1/1 + Subdevice #0: subdevice #0 + +and if you want to select card 3 and the device 0 on that card, please use: + + plughw:3,0 + +as the device_name. +)"; + +bool stop = false; + +static void Handler(int sig) { + stop = true; + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); +} + +int32_t main(int32_t argc, char *argv[]) { + if (argc < 6) { + fprintf(stderr, "%s\n", kUsage); + exit(0); + } + + signal(SIGINT, Handler); + + SherpaOnnxOnlineRecognizerConfig config; + memset(&config, 0, sizeof(config)); + + config.model_config.debug = 0; + config.model_config.num_threads = 1; + config.model_config.provider = "cpu"; + + config.decoding_method = "greedy_search"; + + config.max_active_paths = 4; + + config.feat_config.sample_rate = 16000; + config.feat_config.feature_dim = 80; + + config.enable_endpoint = 1; + config.rule1_min_trailing_silence = 2.4; + config.rule2_min_trailing_silence = 1.2; + config.rule3_min_utterance_length = 300; + + cag_option_context context; + char identifier; + const char *value; + + cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv); + + while (cag_option_fetch(&context)) { + identifier = cag_option_get(&context); + value = cag_option_get_value(&context); + switch (identifier) { + case 't': + config.model_config.tokens = value; + break; + case 'e': + config.model_config.transducer.encoder = value; + break; + case 'd': + config.model_config.transducer.decoder = value; + break; + case 'j': + config.model_config.transducer.joiner = value; + break; + case 'n': + config.model_config.num_threads = atoi(value); + break; + case 'p': + config.model_config.provider = value; + break; + case 'm': + config.decoding_method = value; + break; + case 'f': + config.hotwords_file = value; + break; + case 's': + config.hotwords_score = atof(value); + break; + case 'h': { + fprintf(stderr, "%s\n", kUsage); + exit(0); + break; + } + default: + // do nothing as config already has valid default values + break; + } + } + + SherpaOnnxOnlineRecognizer *recognizer = CreateOnlineRecognizer(&config); + SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer); + + SherpaOnnxDisplay *display = CreateDisplay(50); + int32_t segment_id = 0; + + const char *device_name = argv[context.index]; + sherpa_onnx::Alsa alsa(device_name); + fprintf(stderr, "Use recording device: %s\n", device_name); + fprintf(stderr, + "Please \033[32m\033[1mspeak\033[0m! Press \033[31m\033[1mCtrl + " + "C\033[0m to exit\n"); + + int32_t expected_sample_rate = 16000; + + if (alsa.GetExpectedSampleRate() != expected_sample_rate) { + fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(), + expected_sample_rate); + exit(-1); + } + + int32_t chunk = 0.1 * alsa.GetActualSampleRate(); + + std::string last_text; + + int32_t segment_index = 0; + + while (!stop) { + const std::vector &samples = alsa.Read(chunk); + AcceptWaveform(stream, expected_sample_rate, samples.data(), + samples.size()); + while (IsOnlineStreamReady(recognizer, stream)) { + DecodeOnlineStream(recognizer, stream); + } + + const SherpaOnnxOnlineRecognizerResult *r = + GetOnlineStreamResult(recognizer, stream); + + std::string text = r->text; + DestroyOnlineRecognizerResult(r); + + if (!text.empty() && last_text != text) { + last_text = text; + + std::transform(text.begin(), text.end(), text.begin(), + [](auto c) { return std::tolower(c); }); + + SherpaOnnxPrint(display, segment_index, text.c_str()); + fflush(stderr); + } + + if (IsEndpoint(recognizer, stream)) { + if (!text.empty()) { + ++segment_index; + } + Reset(recognizer, stream); + } + } + + // free allocated resources + DestroyDisplay(display); + DestroyOnlineStream(stream); + DestroyOnlineRecognizer(recognizer); + fprintf(stderr, "\n"); + + return 0; +} diff --git a/c-api-examples/decode-file-c-api.c b/c-api-examples/decode-file-c-api.c index 542cab9c8..46cb11a81 100644 --- a/c-api-examples/decode-file-c-api.c +++ b/c-api-examples/decode-file-c-api.c @@ -157,7 +157,7 @@ int32_t main(int32_t argc, char *argv[]) { break; } default: - // do nothing as config already have valid default values + // do nothing as config already has valid default values break; } } diff --git a/cmake/onnxruntime-linux-aarch64.cmake b/cmake/onnxruntime-linux-aarch64.cmake index 371afd007..b0e09de10 100644 --- a/cmake/onnxruntime-linux-aarch64.cmake +++ b/cmake/onnxruntime-linux-aarch64.cmake @@ -14,19 +14,19 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-aarch64-1.17.1.tgz") -set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-aarch64-1.17.1.tgz") -set(onnxruntime_HASH "SHA256=70b6f536bb7ab5961d128e9dbd192368ac1513bffb74fe92f97aac342fbd0ac1") +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip") +set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip") +set(onnxruntime_HASH "SHA256=2ed01996da79d11ea486f738010bd411096ab91e744306fbd30d09f37e6d43a0") # If you don't have access to the Internet, # please download onnxruntime to one of the following locations. # You can add more if you want. set(possible_file_locations - $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-1.17.1.tgz - ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-1.17.1.tgz - ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-1.17.1.tgz - /tmp/onnxruntime-linux-aarch64-1.17.1.tgz - /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-1.17.1.tgz + $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip + ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip + ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip + /tmp/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip + /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1.zip ) foreach(f IN LISTS possible_file_locations) diff --git a/cmake/onnxruntime-linux-riscv64.cmake b/cmake/onnxruntime-linux-riscv64.cmake index f84b63687..c773e5ecb 100644 --- a/cmake/onnxruntime-linux-riscv64.cmake +++ b/cmake/onnxruntime-linux-riscv64.cmake @@ -14,19 +14,19 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-1.18.0.zip") -set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-1.18.0.zip") -set(onnxruntime_HASH "SHA256=81a11b54d1d71f4b3161b00cba8576a07594abd218aa5c0d82382960ada06092") +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip") +set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip") +set(onnxruntime_HASH "SHA256=c2cbc5af081ff82f46640befd85433811486daaf28e702163c6e4e75020fde81") # If you don't have access to the Internet, # please download onnxruntime to one of the following locations. # You can add more if you want. set(possible_file_locations - $ENV{HOME}/Downloads/onnxruntime-linux-riscv64-1.18.0.zip - ${CMAKE_SOURCE_DIR}/onnxruntime-linux-riscv64-1.18.0.zip - ${CMAKE_BINARY_DIR}/onnxruntime-linux-riscv64-1.18.0.zip - /tmp/onnxruntime-linux-riscv64-1.18.0.zip - /star-fj/fangjun/download/github/onnxruntime-linux-riscv64-1.18.0.zip + $ENV{HOME}/Downloads/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip + ${CMAKE_SOURCE_DIR}/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip + ${CMAKE_BINARY_DIR}/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip + /tmp/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip + /star-fj/fangjun/download/github/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip ) foreach(f IN LISTS possible_file_locations) @@ -65,7 +65,7 @@ add_library(onnxruntime SHARED IMPORTED) set_target_properties(onnxruntime PROPERTIES IMPORTED_LOCATION ${location_onnxruntime} - INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/onnxruntime" + INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/" ) file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*") diff --git a/cmake/onnxruntime-linux-x86_64.cmake b/cmake/onnxruntime-linux-x86_64.cmake index e460e5720..87e4268fe 100644 --- a/cmake/onnxruntime-linux-x86_64.cmake +++ b/cmake/onnxruntime-linux-x86_64.cmake @@ -14,19 +14,19 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz") -set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz") -set(onnxruntime_HASH "SHA256=89b153af88746665909c758a06797175ae366280cbf25502c41eb5955f9a555e") +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip") +set(onnxruntime_URL2 "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip") +set(onnxruntime_HASH "SHA256=3cfa5c2c5c21a9401572af5a4cd9d15ed8f6524f10d3b80e5a38676b3a31efe0") # If you don't have access to the Internet, # please download onnxruntime to one of the following locations. # You can add more if you want. set(possible_file_locations - $ENV{HOME}/Downloads/onnxruntime-linux-x64-1.17.1.tgz - ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-1.17.1.tgz - ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-1.17.1.tgz - /tmp/onnxruntime-linux-x64-1.17.1.tgz - /star-fj/fangjun/download/github/onnxruntime-linux-x64-1.17.1.tgz + $ENV{HOME}/Downloads/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip + ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip + ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip + /tmp/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip + /star-fj/fangjun/download/github/onnxruntime-linux-x64-glibc2_17-Release-1.17.1.zip ) foreach(f IN LISTS possible_file_locations) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 9dda2b148..fe2992ed0 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -117,67 +117,69 @@ function(download_onnxruntime) set(onnxruntime_SOURCE_DIR ${onnxruntime_SOURCE_DIR} PARENT_SCOPE) endfunction() -# First, we try to locate the header and the lib if the use has already -# installed onnxruntime. Otherwise, we will download the pre-compiled lib +if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE) + # First, we try to locate the header and the lib if the user has already + # installed onnxruntime. Otherwise, we will download the pre-compiled lib -message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") -message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - -if(DEFINED ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR}) - set(location_onnxruntime_header_dir $ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR}) - - include_directories(${location_onnxruntime_header_dir}) -else() - find_path(location_onnxruntime_header_dir onnxruntime_cxx_api.h - PATHS - /usr/include - /usr/local/include - ) -endif() + message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") + message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") -message(STATUS "location_onnxruntime_header_dir: ${location_onnxruntime_header_dir}") + if(DEFINED ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR}) + set(location_onnxruntime_header_dir $ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR}) -if(DEFINED ENV{SHERPA_ONNXRUNTIME_LIB_DIR}) - if(APPLE) - set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib) + include_directories(${location_onnxruntime_header_dir}) else() - set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so) + find_path(location_onnxruntime_header_dir onnxruntime_cxx_api.h + PATHS + /usr/include + /usr/local/include + ) endif() - if(NOT EXISTS ${location_onnxruntime_lib}) - set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a) + + message(STATUS "location_onnxruntime_header_dir: ${location_onnxruntime_header_dir}") + + if(DEFINED ENV{SHERPA_ONNXRUNTIME_LIB_DIR}) + if(APPLE) + set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib) + else() + set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so) + endif() if(NOT EXISTS ${location_onnxruntime_lib}) - message(FATAL_ERROR "${location_onnxruntime_lib} cannot be found") + set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a) + if(NOT EXISTS ${location_onnxruntime_lib}) + message(FATAL_ERROR "${location_onnxruntime_lib} cannot be found") + endif() + set(onnxruntime_lib_files $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a) + message("Use static lib: ${onnxruntime_lib_files}") endif() - set(onnxruntime_lib_files $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a) - message("Use static lib: ${onnxruntime_lib_files}") - endif() - if(SHERPA_ONNX_ENABLE_GPU) - set(location_onnxruntime_cuda_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime_providers_cuda.so) - if(NOT EXISTS ${location_onnxruntime_cuda_lib}) - set(location_onnxruntime_cuda_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime_providers_cuda.a) + if(SHERPA_ONNX_ENABLE_GPU) + set(location_onnxruntime_cuda_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime_providers_cuda.so) + if(NOT EXISTS ${location_onnxruntime_cuda_lib}) + set(location_onnxruntime_cuda_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime_providers_cuda.a) + endif() endif() - endif() -else() - find_library(location_onnxruntime_lib onnxruntime - PATHS - /lib - /usr/lib - /usr/local/lib - ) - - if(SHERPA_ONNX_ENABLE_GPU) - find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda + else() + find_library(location_onnxruntime_lib onnxruntime PATHS /lib /usr/lib /usr/local/lib ) + + if(SHERPA_ONNX_ENABLE_GPU) + find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda + PATHS + /lib + /usr/lib + /usr/local/lib + ) + endif() endif() -endif() -message(STATUS "location_onnxruntime_lib: ${location_onnxruntime_lib}") -if(SHERPA_ONNX_ENABLE_GPU) - message(STATUS "location_onnxruntime_cuda_lib: ${location_onnxruntime_cuda_lib}") + message(STATUS "location_onnxruntime_lib: ${location_onnxruntime_lib}") + if(SHERPA_ONNX_ENABLE_GPU) + message(STATUS "location_onnxruntime_cuda_lib: ${location_onnxruntime_cuda_lib}") + endif() endif() if(location_onnxruntime_header_dir AND location_onnxruntime_lib) @@ -195,6 +197,10 @@ if(location_onnxruntime_header_dir AND location_onnxruntime_lib) endif() endif() else() - message(STATUS "Could not find a pre-installed onnxruntime. Downloading pre-compiled onnxruntime") + if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE) + message(STATUS "Could not find a pre-installed onnxruntime.") + endif() + message(STATUS "Downloading pre-compiled onnxruntime") + download_onnxruntime() endif() diff --git a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj index 0d3be225f..89d699d26 100644 --- a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj +++ b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj @@ -40,7 +40,7 @@ /* End PBXContainerItemProxy section */ /* Begin PBXFileReference section */ - C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.16.3/onnxruntime.xcframework"; sourceTree = ""; }; + C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = ""; }; C93989B12A89FF78009AB859 /* decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = decoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; sourceTree = ""; }; C93989B22A89FF78009AB859 /* encoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = encoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; sourceTree = ""; }; C93989B32A89FF78009AB859 /* tokens.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = tokens.txt; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; sourceTree = ""; }; diff --git a/ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.pbxproj b/ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.pbxproj index 649e9a770..84082e889 100644 --- a/ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.pbxproj +++ b/ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.pbxproj @@ -30,7 +30,7 @@ C9A2588D2AAF039D00E555CA /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extension.swift; sourceTree = ""; }; C9A258922AAF057E00E555CA /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = ""; }; C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = ""; }; - C9A258972AAF05E400E555CA /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.16.3/onnxruntime.xcframework"; sourceTree = ""; }; + C9A258972AAF05E400E555CA /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ diff --git a/ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.pbxproj b/ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.pbxproj index a1f8eeffd..6e6908ff7 100644 --- a/ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.pbxproj +++ b/ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.pbxproj @@ -36,7 +36,7 @@ DE081AAE2ABFF35400E8CD63 /* UTType.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UTType.swift; sourceTree = ""; }; DE081AB02ABFFEEE00E8CD63 /* Document.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Document.swift; sourceTree = ""; }; DE081AB22ABFFF2600E8CD63 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = ""; }; - DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.16.3/onnxruntime.xcframework"; sourceTree = ""; }; + DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = ""; }; DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = ""; }; DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechSegment.swift; sourceTree = ""; }; DEA22DEE2AC1796C00549373 /* tiny.en-encoder.int8.onnx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "tiny.en-encoder.int8.onnx"; sourceTree = ""; }; diff --git a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj index 8f75463f4..e9ea2c213 100644 --- a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj +++ b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj @@ -26,7 +26,7 @@ C9FE9FE42B0F33CD009F1003 /* ViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewModel.swift; sourceTree = ""; }; C9FE9FE62B0F3620009F1003 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = ""; }; C9FE9FE92B0F3754009F1003 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = ""; }; - C9FE9FEB2B0F3785009F1003 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.16.3/onnxruntime.xcframework"; sourceTree = ""; }; + C9FE9FEB2B0F3785009F1003 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ diff --git a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py new file mode 100755 index 000000000..45962755f --- /dev/null +++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 + +# Real-time speech recognition from a microphone with sherpa-onnx Python API +# with endpoint detection. +# +# Note: This script uses ALSA and works only on Linux systems, especially +# for embedding Linux systems and for running Linux on Windows using WSL. +# +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +# to download pre-trained models + +import argparse +import sys +from pathlib import Path +import sherpa_onnx + + +def assert_file_exists(filename: str): + assert Path(filename).is_file(), ( + f"{filename} does not exist!\n" + "Please refer to " + "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it" + ) + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--tokens", + type=str, + required=True, + help="Path to tokens.txt", + ) + + parser.add_argument( + "--encoder", + type=str, + required=True, + help="Path to the encoder model", + ) + + parser.add_argument( + "--decoder", + type=str, + required=True, + help="Path to the decoder model", + ) + + parser.add_argument( + "--joiner", + type=str, + required=True, + help="Path to the joiner model", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="Valid values are greedy_search and modified_beam_search", + ) + + parser.add_argument( + "--provider", + type=str, + default="cpu", + help="Valid values: cpu, cuda, coreml", + ) + + parser.add_argument( + "--hotwords-file", + type=str, + default="", + help=""" + The file containing hotwords, one words/phrases per line, and for each + phrase the bpe/cjkchar are separated by a space. For example: + + ▁HE LL O ▁WORLD + 你 好 世 界 + """, + ) + + parser.add_argument( + "--hotwords-score", + type=float, + default=1.5, + help=""" + The hotword score of each token for biasing word/phrase. Used only if + --hotwords-file is given. + """, + ) + + parser.add_argument( + "--blank-penalty", + type=float, + default=0.0, + help=""" + The penalty applied on blank symbol during decoding. + Note: It is a positive value that would be applied to logits like + this `logits[:, 0] -= blank_penalty` (suppose logits.shape is + [batch_size, vocab] and blank id is 0). + """, + ) + + parser.add_argument( + "--device-name", + type=str, + required=True, + help=""" +The device name specifies which microphone to use in case there are several +on your system. You can use + + arecord -l + +to find all available microphones on your computer. For instance, if it outputs + +**** List of CAPTURE Hardware Devices **** +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] + Subdevices: 1/1 + Subdevice #0: subdevice #0 + +and if you want to select card 3 and the device 0 on that card, please use: + + plughw:3,0 + +as the device_name. + """, + ) + + return parser.parse_args() + + +def create_recognizer(args): + assert_file_exists(args.encoder) + assert_file_exists(args.decoder) + assert_file_exists(args.joiner) + assert_file_exists(args.tokens) + # Please replace the model files if needed. + # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + # for download links. + recognizer = sherpa_onnx.OnlineRecognizer.from_transducer( + tokens=args.tokens, + encoder=args.encoder, + decoder=args.decoder, + joiner=args.joiner, + num_threads=1, + sample_rate=16000, + feature_dim=80, + enable_endpoint_detection=True, + rule1_min_trailing_silence=2.4, + rule2_min_trailing_silence=1.2, + rule3_min_utterance_length=300, # it essentially disables this rule + decoding_method=args.decoding_method, + provider=args.provider, + hotwords_file=args.hotwords_file, + hotwords_score=args.hotwords_score, + blank_penalty=args.blank_penalty, + ) + return recognizer + + +def main(): + args = get_args() + device_name = args.device_name + print(f"device_name: {device_name}") + alsa = sherpa_onnx.Alsa(device_name) + + print("Creating recognizer") + recognizer = create_recognizer(args) + print("Started! Please speak") + + sample_rate = 16000 + samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms + + stream = recognizer.create_stream() + + last_result = "" + segment_id = 0 + while True: + samples = alsa.read(samples_per_read) # a blocking read + stream.accept_waveform(sample_rate, samples) + while recognizer.is_ready(stream): + recognizer.decode_stream(stream) + + is_endpoint = recognizer.is_endpoint(stream) + + result = recognizer.get_result(stream) + + if result and (last_result != result): + last_result = result + print("\r{}:{}".format(segment_id, result), end="", flush=True) + if is_endpoint: + if result: + print("\r{}:{}".format(segment_id, result), flush=True) + segment_id += 1 + recognizer.reset(stream) + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\nCaught Ctrl + C. Exiting") diff --git a/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc b/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc index e37ba63d4..5357974df 100644 --- a/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc +++ b/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc @@ -173,7 +173,7 @@ void OnlineTransducerModifiedBeamSearchDecoder::Decode( new_hyp.num_trailing_blanks = 0; if (ss != nullptr && ss[b]->GetContextGraph() != nullptr) { auto context_res = ss[b]->GetContextGraph()->ForwardOneStep( - context_state, new_token); + context_state, new_token, false /*strict mode*/); context_score = std::get<0>(context_res); new_hyp.context_state = std::get<1>(context_res); } diff --git a/sherpa-onnx/csrc/session.cc b/sherpa-onnx/csrc/session.cc index 94987ebc9..759c66dd4 100644 --- a/sherpa-onnx/csrc/session.cc +++ b/sherpa-onnx/csrc/session.cc @@ -16,7 +16,7 @@ #endif #if __ANDROID_API__ >= 27 -#include "nnapi_provider_factory.h" +#include "nnapi_provider_factory.h" // NOLINT #endif namespace sherpa_onnx { diff --git a/sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc b/sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc index 5ecf99a5b..76695d5cf 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc @@ -276,8 +276,8 @@ as the device_name. } } - using namespace std::chrono_literals; - std::this_thread::sleep_for(20ms); // sleep for 20ms + using namespace std::chrono_literals; // NOLINT + std::this_thread::sleep_for(20ms); // sleep for 20ms } t.join(); diff --git a/sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc b/sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc index dc17230c6..2f24a21a6 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc @@ -192,8 +192,8 @@ as the device_name. } } - using namespace std::chrono_literals; - std::this_thread::sleep_for(20ms); // sleep for 20ms + using namespace std::chrono_literals; // NOLINT + std::this_thread::sleep_for(20ms); // sleep for 20ms } t.join(); t2.join(); diff --git a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc index ab61eb87c..2e784ebb8 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc @@ -53,10 +53,6 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] and if you want to select card 3 and the device 0 on that card, please use: - hw:3,0 - -or - plughw:3,0 as the device_name. diff --git a/sherpa-onnx/jni/jni.cc b/sherpa-onnx/jni/jni.cc index a8f0ef4a3..1dbf96a7d 100644 --- a/sherpa-onnx/jni/jni.cc +++ b/sherpa-onnx/jni/jni.cc @@ -616,6 +616,22 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { ans.model_config.whisper.decoder = p; env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(whisper_config_cls, "language", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(whisper_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.whisper.language = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(whisper_config_cls, "task", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(whisper_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model_config.whisper.task = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(whisper_config_cls, "tailPaddings", "I"); + ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config, + fid); + return ans; } diff --git a/sherpa-onnx/python/csrc/CMakeLists.txt b/sherpa-onnx/python/csrc/CMakeLists.txt index 30f646216..bba7903a0 100644 --- a/sherpa-onnx/python/csrc/CMakeLists.txt +++ b/sherpa-onnx/python/csrc/CMakeLists.txt @@ -1,6 +1,6 @@ include_directories(${CMAKE_SOURCE_DIR}) -pybind11_add_module(_sherpa_onnx +set(srcs circular-buffer.cc display.cc endpoint.cc @@ -37,6 +37,13 @@ pybind11_add_module(_sherpa_onnx vad-model.cc voice-activity-detector.cc ) +if(SHERPA_ONNX_HAS_ALSA) + list(APPEND srcs ${CMAKE_SOURCE_DIR}/sherpa-onnx/csrc/alsa.cc alsa.cc) +else() + list(APPEND srcs faked-alsa.cc) +endif() + +pybind11_add_module(_sherpa_onnx ${srcs}) if(APPLE) execute_process( @@ -54,6 +61,14 @@ endif() target_link_libraries(_sherpa_onnx PRIVATE sherpa-onnx-core) +if(SHERPA_ONNX_HAS_ALSA) + if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}) + target_link_libraries(_sherpa_onnx PRIVATE -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound) + else() + target_link_libraries(_sherpa_onnx PRIVATE asound) + endif() +endif() + install(TARGETS _sherpa_onnx DESTINATION ../ ) diff --git a/sherpa-onnx/python/csrc/alsa.cc b/sherpa-onnx/python/csrc/alsa.cc new file mode 100644 index 000000000..b6f752fa2 --- /dev/null +++ b/sherpa-onnx/python/csrc/alsa.cc @@ -0,0 +1,30 @@ +// sherpa-onnx/python/csrc/alsa.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include "sherpa-onnx/python/csrc/alsa.h" + +#include + +#include "sherpa-onnx/csrc/alsa.h" + +namespace sherpa_onnx { + +void PybindAlsa(py::module *m) { + using PyClass = Alsa; + py::class_(*m, "Alsa") + .def(py::init(), py::arg("device_name"), + py::call_guard()) + .def( + "read", + [](PyClass &self, int32_t num_samples) -> std::vector { + return self.Read(num_samples); + }, + py::arg("num_samples"), py::call_guard()) + .def_property_readonly("expected_sample_rate", + &PyClass::GetExpectedSampleRate) + .def_property_readonly("actual_sample_rate", + &PyClass::GetActualSampleRate); +} + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/python/csrc/alsa.h b/sherpa-onnx/python/csrc/alsa.h new file mode 100644 index 000000000..e0106c12c --- /dev/null +++ b/sherpa-onnx/python/csrc/alsa.h @@ -0,0 +1,16 @@ +// sherpa-onnx/python/csrc/alsa.h +// +// Copyright (c) 2024 Xiaomi Corporation + +#ifndef SHERPA_ONNX_PYTHON_CSRC_ALSA_H_ +#define SHERPA_ONNX_PYTHON_CSRC_ALSA_H_ + +#include "sherpa-onnx/python/csrc/sherpa-onnx.h" + +namespace sherpa_onnx { + +void PybindAlsa(py::module *m); + +} // namespace sherpa_onnx + +#endif // SHERPA_ONNX_PYTHON_CSRC_ALSA_H_ diff --git a/sherpa-onnx/python/csrc/faked-alsa.cc b/sherpa-onnx/python/csrc/faked-alsa.cc new file mode 100644 index 000000000..26ce28fff --- /dev/null +++ b/sherpa-onnx/python/csrc/faked-alsa.cc @@ -0,0 +1,45 @@ +// sherpa-onnx/python/csrc/faked-alsa.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/python/csrc/alsa.h" + +namespace sherpa_onnx { + +class FakedAlsa { + public: + explicit FakedAlsa(const char *) { + SHERPA_ONNX_LOGE("This function is for Linux only."); +#if (SHERPA_ONNX_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix)) + SHERPA_ONNX_LOGE(R"doc( +sherpa-onnx is compiled without alsa support. To enable that, please run + (1) sudo apt-get install alsa-utils libasound2-dev + (2) rebuild sherpa-onnx +)doc"); +#endif + exit(-1); + } + + std::vector Read(int32_t) const { return {}; } + int32_t GetExpectedSampleRate() const { return -1; } + int32_t GetActualSampleRate() const { return -1; } +}; + +void PybindAlsa(py::module *m) { + using PyClass = FakedAlsa; + py::class_(*m, "Alsa") + .def(py::init(), py::arg("device_name")) + .def( + "read", + [](PyClass &self, int32_t num_samples) -> std::vector { + return self.Read(num_samples); + }, + py::arg("num_samples"), py::call_guard()) + .def_property_readonly("expected_sample_rate", + &PyClass::GetExpectedSampleRate) + .def_property_readonly("actual_sample_rate", + &PyClass::GetActualSampleRate); +} + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/python/csrc/sherpa-onnx.cc b/sherpa-onnx/python/csrc/sherpa-onnx.cc index bdc38bbe9..7b0d7c0a0 100644 --- a/sherpa-onnx/python/csrc/sherpa-onnx.cc +++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc @@ -4,6 +4,7 @@ #include "sherpa-onnx/python/csrc/sherpa-onnx.h" +#include "sherpa-onnx/python/csrc/alsa.h" #include "sherpa-onnx/python/csrc/circular-buffer.h" #include "sherpa-onnx/python/csrc/display.h" #include "sherpa-onnx/python/csrc/endpoint.h" @@ -54,6 +55,8 @@ PYBIND11_MODULE(_sherpa_onnx, m) { PybindOfflineTts(&m); PybindSpeakerEmbeddingExtractor(&m); PybindSpeakerEmbeddingManager(&m); + + PybindAlsa(&m); } } // namespace sherpa_onnx diff --git a/sherpa-onnx/python/sherpa_onnx/__init__.py b/sherpa-onnx/python/sherpa_onnx/__init__.py index 926edbb8f..ee22bd432 100644 --- a/sherpa-onnx/python/sherpa_onnx/__init__.py +++ b/sherpa-onnx/python/sherpa_onnx/__init__.py @@ -1,4 +1,5 @@ from _sherpa_onnx import ( + Alsa, CircularBuffer, Display, OfflineStream, From a29c6ff428d4775ba74236da8a0e27a989bd3693 Mon Sep 17 00:00:00 2001 From: lovemefan Date: Mon, 11 Mar 2024 13:59:39 +0800 Subject: [PATCH 17/18] remove duplicate code when merge pr #642 --- sherpa-onnx/c-api/c-api.cc | 190 +------------------------------------ 1 file changed, 3 insertions(+), 187 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index c40b5bd72..e4bd4776c 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -411,189 +411,6 @@ void DestroyOfflineRecognizerResult( } } -// ============================================================ -// For Keyword Spot -// ============================================================ - -struct SherpaOnnxKeywordSpotter { - std::unique_ptr impl; -}; - -SherpaOnnxKeywordSpotter* CreateKeywordSpotter( - const SherpaOnnxKeywordSpotterConfig* config) { - sherpa_onnx::KeywordSpotterConfig spotter_config; - - spotter_config.feat_config.sampling_rate = - SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); - spotter_config.feat_config.feature_dim = - SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); - - spotter_config.model_config.transducer.encoder = - SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); - spotter_config.model_config.transducer.decoder = - SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); - spotter_config.model_config.transducer.joiner = - SHERPA_ONNX_OR(config->model_config.transducer.joiner, ""); - - spotter_config.model_config.paraformer.encoder = - SHERPA_ONNX_OR(config->model_config.paraformer.encoder, ""); - spotter_config.model_config.paraformer.decoder = - SHERPA_ONNX_OR(config->model_config.paraformer.decoder, ""); - - spotter_config.model_config.zipformer2_ctc.model = - SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, ""); - - spotter_config.model_config.tokens = - SHERPA_ONNX_OR(config->model_config.tokens, ""); - spotter_config.model_config.num_threads = - SHERPA_ONNX_OR(config->model_config.num_threads, 1); - spotter_config.model_config.provider = - SHERPA_ONNX_OR(config->model_config.provider, "cpu"); - spotter_config.model_config.model_type = - SHERPA_ONNX_OR(config->model_config.model_type, ""); - spotter_config.model_config.debug = - SHERPA_ONNX_OR(config->model_config.debug, 0); - - spotter_config.max_active_paths = - SHERPA_ONNX_OR(config->max_active_paths, 4); - - spotter_config.num_trailing_blanks = - SHERPA_ONNX_OR(config->num_trailing_blanks , 1); - - spotter_config.keywords_score = - SHERPA_ONNX_OR(config->keywords_score, 1.0); - - spotter_config.keywords_threshold = - SHERPA_ONNX_OR(config->keywords_threshold, 0.25); - - spotter_config.keywords_file = - SHERPA_ONNX_OR(config->keywords_file, ""); - - if (config->model_config.debug) { - SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str()); - } - - if (!spotter_config.Validate()) { - SHERPA_ONNX_LOGE("Errors in config!"); - return nullptr; - } - - SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter; - - spotter->impl = - std::make_unique(spotter_config); - - return spotter; -} - -void DestroyKeywordSpotter(SherpaOnnxKeywordSpotter* spotter) { - delete spotter; -} - -SherpaOnnxOnlineStream* CreateKeywordStream( - const SherpaOnnxKeywordSpotter* spotter) { - SherpaOnnxOnlineStream* stream = - new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); - return stream; -} - -int32_t IsKeywordStreamReady( - SherpaOnnxKeywordSpotter* spotter, SherpaOnnxOnlineStream* stream) { - return spotter->impl->IsReady(stream->impl.get()); -} - -void DecodeKeywordStream(SherpaOnnxKeywordSpotter* spotter, - SherpaOnnxOnlineStream* stream) { - return spotter->impl->DecodeStream(stream->impl.get()); -} - -void DecodeMultipleKeywordStreams( - SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream **streams, - int32_t n) { - std::vector ss(n); - for (int32_t i = 0; i != n; ++i) { - ss[i] = streams[i]->impl.get(); - } - spotter->impl->DecodeStreams(ss.data(), n); -} - -const SherpaOnnxKeywordResult *GetKeywordResult( - SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream) { - const sherpa_onnx::KeywordResult& result = - spotter->impl->GetResult(stream->impl.get()); - const auto &keyword = result.keyword; - - auto r = new SherpaOnnxKeywordResult; - memset(r, 0, sizeof(SherpaOnnxKeywordResult)); - - r->start_time = result.start_time; - - // copy keyword - r->keyword = new char[keyword.size() + 1]; - std::copy(keyword.begin(), keyword.end(), const_cast(r->keyword)); - const_cast(r->keyword)[keyword.size()] = 0; - - // copy json - const auto &json = result.AsJsonString(); - r->json = new char[json.size() + 1]; - std::copy(json.begin(), json.end(), const_cast(r->json)); - const_cast(r->json)[json.size()] = 0; - - // copy tokens - auto count = result.tokens.size(); - if (count > 0) { - size_t total_length = 0; - for (const auto &token : result.tokens) { - // +1 for the null character at the end of each token - total_length += token.size() + 1; - } - - r->count = count; - // Each word ends with nullptr - r->tokens = new char[total_length]; - memset(reinterpret_cast(const_cast(r->tokens)), 0, - total_length); - char **tokens_temp = new char *[r->count]; - int32_t pos = 0; - for (int32_t i = 0; i < r->count; ++i) { - tokens_temp[i] = const_cast(r->tokens) + pos; - memcpy(reinterpret_cast(const_cast(r->tokens + pos)), - result.tokens[i].c_str(), result.tokens[i].size()); - // +1 to move past the null character - pos += result.tokens[i].size() + 1; - } - r->tokens_arr = tokens_temp; - - if (!result.timestamps.empty()) { - r->timestamps = new float[result.timestamps.size()]; - std::copy(result.timestamps.begin(), result.timestamps.end(), - r->timestamps); - } else { - r->timestamps = nullptr; - } - - } else { - r->count = 0; - r->timestamps = nullptr; - r->tokens = nullptr; - r->tokens_arr = nullptr; - } - - return r; -} - -void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { - if (r) { - delete[] r->keyword; - delete[] r->json; - delete[] r->tokens; - delete[] r->tokens_arr; - delete[] r->timestamps; - delete r; - } -} - - // ============================================================ // For Keyword Spot // ============================================================ @@ -670,7 +487,7 @@ SherpaOnnxKeywordSpotter* CreateKeywordSpotter( SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter; spotter->impl = - std::make_unique(spotter_config); + std::make_unique(spotter_config); return spotter; } @@ -682,7 +499,7 @@ void DestroyKeywordSpotter(SherpaOnnxKeywordSpotter* spotter) { SherpaOnnxOnlineStream* CreateKeywordStream( const SherpaOnnxKeywordSpotter* spotter) { SherpaOnnxOnlineStream* stream = - new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); + new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); return stream; } @@ -701,7 +518,7 @@ void DecodeMultipleKeywordStreams( int32_t n) { std::vector ss(n); for (int32_t i = 0; i != n; ++i) { - ss[i] = streams[i]->impl.get(); + ss[i] = streams[i]->impl.get(); } spotter->impl->DecodeStreams(ss.data(), n); } @@ -782,7 +599,6 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) { } } - // ============================================================ // For VAD // ============================================================ From a3e940dbb23fb6a2d3168e406952b081937d0e2a Mon Sep 17 00:00:00 2001 From: lovemefan Date: Mon, 11 Mar 2024 17:11:36 +0800 Subject: [PATCH 18/18] only do not check the hotwords file when SHERPA_ONNX_ENABLE_WASM_KWS on --- sherpa-onnx/c-api/c-api.cc | 12 +++--------- sherpa-onnx/csrc/keyword-spotter.cc | 7 +++++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index e4bd4776c..d9886c64b 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -473,21 +473,15 @@ SherpaOnnxKeywordSpotter* CreateKeywordSpotter( SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str()); } -#ifndef SHERPA_ONNX_ENABLE_WASM_KWS - // due to the limitations of the wasm file system, - // keywords file will be packaged into the sherpa-onnx-wasm-kws-main.data file - // Solution: take keyword_file variable is directly - // parsed as a string of keywords if (!spotter_config.Validate()) { SHERPA_ONNX_LOGE("Errors in config!"); return nullptr; } -#endif SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter; spotter->impl = - std::make_unique(spotter_config); + std::make_unique(spotter_config); return spotter; } @@ -499,7 +493,7 @@ void DestroyKeywordSpotter(SherpaOnnxKeywordSpotter* spotter) { SherpaOnnxOnlineStream* CreateKeywordStream( const SherpaOnnxKeywordSpotter* spotter) { SherpaOnnxOnlineStream* stream = - new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); + new SherpaOnnxOnlineStream(spotter->impl->CreateStream()); return stream; } @@ -518,7 +512,7 @@ void DecodeMultipleKeywordStreams( int32_t n) { std::vector ss(n); for (int32_t i = 0; i != n; ++i) { - ss[i] = streams[i]->impl.get(); + ss[i] = streams[i]->impl.get(); } spotter->impl->DecodeStreams(ss.data(), n); } diff --git a/sherpa-onnx/csrc/keyword-spotter.cc b/sherpa-onnx/csrc/keyword-spotter.cc index 342b8308f..274a7fddf 100644 --- a/sherpa-onnx/csrc/keyword-spotter.cc +++ b/sherpa-onnx/csrc/keyword-spotter.cc @@ -94,10 +94,17 @@ bool KeywordSpotterConfig::Validate() const { SHERPA_ONNX_LOGE("Please provide --keywords-file."); return false; } + +#ifndef SHERPA_ONNX_ENABLE_WASM_KWS + // due to the limitations of the wasm file system, + // keywords file will be packaged into the sherpa-onnx-wasm-kws-main.data file + // Solution: take keyword_file variable is directly + // parsed as a string of keywords if (!std::ifstream(keywords_file.c_str()).good()) { SHERPA_ONNX_LOGE("Keywords file %s does not exist.", keywords_file.c_str()); return false; } +#endif return model_config.Validate(); }