forked from PaddlePaddle/PaddleSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#2138 from zh794390558/demos
[demos] more cli for speech demos
- Loading branch information
Showing
21 changed files
with
159 additions
and
13 deletions.
There are no files selected for viewing
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,26 @@ | ||
#!/bin/bash | ||
|
||
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav | ||
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav | ||
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav | ||
|
||
# asr | ||
paddlespeech asr --input ./zh.wav | ||
|
||
|
||
# asr + punc | ||
paddlespeech asr --input ./zh.wav | paddlespeech text --task punc | ||
|
||
|
||
# asr help | ||
paddlespeech asr --help | ||
|
||
|
||
# english asr | ||
paddlespeech asr --lang en --model transformer_librispeech --input ./en.wav | ||
|
||
# model stats | ||
paddlespeech stats --task asr | ||
|
||
|
||
# paddlespeech help | ||
paddlespeech --help |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
#!/bin/bash | ||
|
||
paddlespeech_server start --config_file ./conf/application.yaml | ||
paddlespeech_server start --config_file ./conf/application.yaml &> server.log & |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav | ||
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav | ||
|
||
# sid extract | ||
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav | ||
|
||
# sid score | ||
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
|
||
paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭 |
Empty file.
File renamed without changes.
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
export CUDA_VISIBLE_DEVICE=0,1,2,3 | ||
export CUDA_VISIBLE_DEVICE=0,1,2,3 | ||
#export CUDA_VISIBLE_DEVICE=0,1,2,3 | ||
|
||
# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 & | ||
# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 & | ||
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log & | ||
|
||
# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 & | ||
# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 & | ||
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log & | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
demos/streaming_tts_server/conf/tts_online_ws_application.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# This is the parameter configuration file for streaming tts server. | ||
|
||
################################################################################# | ||
# SERVER SETTING # | ||
################################################################################# | ||
host: 0.0.0.0 | ||
port: 8192 | ||
|
||
# The task format in the engin_list is: <speech task>_<engine type> | ||
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online. | ||
# protocol choices = ['websocket', 'http'] | ||
protocol: 'websocket' | ||
engine_list: ['tts_online-onnx'] | ||
|
||
|
||
################################################################################# | ||
# ENGINE CONFIG # | ||
################################################################################# | ||
|
||
################################### TTS ######################################### | ||
################### speech task: tts; engine_type: online ####################### | ||
tts_online: | ||
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc'] | ||
# fastspeech2_cnndecoder_csmsc support streaming am infer. | ||
am: 'fastspeech2_csmsc' | ||
am_config: | ||
am_ckpt: | ||
am_stat: | ||
phones_dict: | ||
tones_dict: | ||
speaker_dict: | ||
spk_id: 0 | ||
|
||
# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc'] | ||
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference | ||
voc: 'mb_melgan_csmsc' | ||
voc_config: | ||
voc_ckpt: | ||
voc_stat: | ||
|
||
# others | ||
lang: 'zh' | ||
device: 'cpu' # set 'gpu:id' or 'cpu' | ||
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer, | ||
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio | ||
am_block: 72 | ||
am_pad: 12 | ||
# voc_pad and voc_block voc model to streaming voc infer, | ||
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal | ||
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal | ||
voc_block: 36 | ||
voc_pad: 14 | ||
|
||
|
||
|
||
################################################################################# | ||
# ENGINE CONFIG # | ||
################################################################################# | ||
|
||
################################### TTS ######################################### | ||
################### speech task: tts; engine_type: online-onnx ####################### | ||
tts_online-onnx: | ||
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx'] | ||
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer. | ||
am: 'fastspeech2_cnndecoder_csmsc_onnx' | ||
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model]; | ||
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model]; | ||
am_ckpt: # list | ||
am_stat: | ||
phones_dict: | ||
tones_dict: | ||
speaker_dict: | ||
spk_id: 0 | ||
am_sample_rate: 24000 | ||
am_sess_conf: | ||
device: "cpu" # set 'gpu:id' or 'cpu' | ||
use_trt: False | ||
cpu_threads: 4 | ||
|
||
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx'] | ||
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference | ||
voc: 'hifigan_csmsc_onnx' | ||
voc_ckpt: | ||
voc_sample_rate: 24000 | ||
voc_sess_conf: | ||
device: "cpu" # set 'gpu:id' or 'cpu' | ||
use_trt: False | ||
cpu_threads: 4 | ||
|
||
# others | ||
lang: 'zh' | ||
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer, | ||
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio | ||
am_block: 72 | ||
am_pad: 12 | ||
# voc_pad and voc_block voc model to streaming voc infer, | ||
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal | ||
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal | ||
voc_block: 36 | ||
voc_pad: 14 | ||
# voc_upsample should be same as n_shift on voc config. | ||
voc_upsample: 300 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
# http server | ||
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log & | ||
|
||
|
||
# websocket server | ||
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log & | ||
|
||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters