From cddac52780d587444d70b72d7ae4d2d7dfdf1312 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 18 Sep 2024 11:03:42 +0800 Subject: [PATCH] Support passing utf-8 strings from JavaScript to C++. (#1355) We first convert utf-16 strings to Uint8Array and then we pass the array to C++. --- nodejs-addon-examples/package.json | 2 +- .../test_asr_non_streaming_sense_voice.js | 15 ++++++++++ scripts/node-addon-api/src/macros.h | 29 ++++++++++++------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json index 121a8103e..41bcfb8fc 100644 --- a/nodejs-addon-examples/package.json +++ b/nodejs-addon-examples/package.json @@ -1,5 +1,5 @@ { "dependencies": { - "sherpa-onnx-node": "^1.10.26" + "sherpa-onnx-node": "^1.10.27" } } diff --git a/nodejs-addon-examples/test_asr_non_streaming_sense_voice.js b/nodejs-addon-examples/test_asr_non_streaming_sense_voice.js index 99371e8f3..116024ebb 100644 --- a/nodejs-addon-examples/test_asr_non_streaming_sense_voice.js +++ b/nodejs-addon-examples/test_asr_non_streaming_sense_voice.js @@ -3,6 +3,19 @@ const sherpa_onnx = require('sherpa-onnx-node'); // Please download test files from // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + + +// If your path contains non-ascii characters, e.g., Chinese, you can use +// the following code +// + +// let encoder = new TextEncoder(); +// let tokens = encoder.encode( +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt'); +// let model = encoder.encode( +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx'); + + const config = { 'featConfig': { 'sampleRate': 16000, @@ -12,9 +25,11 @@ const config = { 'senseVoice': { 'model': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', + // 'model': model, 'useInverseTextNormalization': 1, }, 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', + // 'tokens': tokens, 'numThreads': 2, 'provider': 'cpu', 'debug': 1, diff --git a/scripts/node-addon-api/src/macros.h b/scripts/node-addon-api/src/macros.h index bed930620..ac0dbd567 100644 --- a/scripts/node-addon-api/src/macros.h +++ b/scripts/node-addon-api/src/macros.h @@ -7,17 +7,24 @@ #include #include -#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \ - do { \ - if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \ - Napi::String _str = o.Get(#js_name).As(); \ - std::string s = _str.Utf8Value(); \ - char *p = new char[s.size() + 1]; \ - std::copy(s.begin(), s.end(), p); \ - p[s.size()] = 0; \ - \ - c.c_name = p; \ - } \ +#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \ + do { \ + if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \ + Napi::String _str = o.Get(#js_name).As(); \ + std::string s = _str.Utf8Value(); \ + char *p = new char[s.size() + 1]; \ + std::copy(s.begin(), s.end(), p); \ + p[s.size()] = 0; \ + \ + c.c_name = p; \ + } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) { \ + Napi::Uint8Array _array = o.Get(#js_name).As(); \ + char *p = new char[_array.ElementLength() + 1]; \ + std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \ + p[_array.ElementLength()] = '\0'; \ + \ + c.c_name = p; \ + } \ } while (0) #define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name) \