Skip to content

Commit

Permalink
Add C# API for SenseVoice models
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Jul 20, 2024
1 parent 25f0a10 commit ef9f074
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 13 deletions.
21 changes: 11 additions & 10 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,8 @@

cd dotnet-examples/

cd ./keyword-spotting-from-files
./run.sh

cd ../online-decode-files
./run-transducer-itn.sh
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh

cd ../offline-decode-files
cd ./offline-decode-files
./run-sense-voice-ctc.sh
./run-paraformer-itn.sh
./run-telespeech-ctc.sh
./run-nemo-ctc.sh
Expand All @@ -21,6 +13,15 @@ cd ../offline-decode-files
./run-whisper.sh
./run-tdnn-yesno.sh

cd ../keyword-spotting-from-files
./run.sh

cd ../online-decode-files
./run-transducer-itn.sh
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh

cd ../vad-non-streaming-asr-paraformer
./run.sh

Expand Down
11 changes: 11 additions & 0 deletions dotnet-examples/offline-decode-files/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ class Options
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
public string TeleSpeechCtc { get; set; } = "";

[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
public string SenseVoiceModel { get; set; } = "";

[Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
public int SenseVoiceUseItn { get; set; } = 1;

[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; } = 1;

Expand Down Expand Up @@ -225,6 +231,11 @@ private static void Run(Options options)
{
config.ModelConfig.Tdnn.Model = options.TdnnModel;
}
else if (!String.IsNullOrEmpty(options.SenseVoiceModel))
{
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
}
else
{
Console.WriteLine("Please provide a model");
Expand Down
14 changes: 14 additions & 0 deletions dotnet-examples/offline-decode-files/run-sense-voice-ctc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

dotnet run \
--sense-voice-model=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
--tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
--files ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav
7 changes: 4 additions & 3 deletions scripts/dotnet/OfflineModelConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public OfflineModelConfig()
ModelingUnit = "cjkchar";
BpeVocab = "";
TeleSpeechCtc = "";
SenseVoice = new OfflineSenseVoiceModelConfig();
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
Expand Down Expand Up @@ -51,7 +52,7 @@ public OfflineModelConfig()

[MarshalAs(UnmanagedType.LPStr)]
public string TeleSpeechCtc;
}


}
public OfflineSenseVoiceModelConfig SenseVoice;
}
}
24 changes: 24 additions & 0 deletions scripts/dotnet/OfflineSenseVoiceModelConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineSenseVoiceModelConfig
{
public OfflineSenseVoiceModelConfig()
{
Model = "";
Language = "";
UseInverseTextNormalization = 0;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;

[MarshalAs(UnmanagedType.LPStr)]
public string Language;

public int UseInverseTextNormalization;
}
}

0 comments on commit ef9f074

Please sign in to comment.