Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
bbdfeaf
raw_python_script: Adding raw nnet training
vimalmanohar Sep 27, 2016
4c060c3
raw_python_script: Raw LSTM config
vimalmanohar Sep 27, 2016
185e031
raw-signal-v2: Adding steps/nnet3/tdnn/make_raw_configs.py
vimalmanohar Sep 29, 2016
851eb24
raw_python_script: Made raw and AM nnets training and configs similar
vimalmanohar Sep 29, 2016
23aa55c
raw_python_script: tdnn make_configs.py with support for raw nnet3
vimalmanohar Sep 29, 2016
d074e56
raw_python_script: Refactoring DNN training
vimalmanohar Sep 29, 2016
14db046
raw_python_script: Minor bug fixes
vimalmanohar Sep 30, 2016
0782aab
raw_python_script: Refactoring RNN and DNN scripts
vimalmanohar Oct 1, 2016
0712a32
Merging from master
vimalmanohar Oct 4, 2016
5b17a4c
raw_python_script: Addressed comments and made changes
vimalmanohar Oct 6, 2016
f73183f
raw_python_script: Missed variable renames
vimalmanohar Oct 6, 2016
167d909
raw_python_script: Changing module imports
vimalmanohar Oct 8, 2016
bb8a6db
added babel_multilang example dir for multilingual setting and added …
pegahgh Jul 12, 2016
0ebdb97
fixed small issue.
pegahgh Oct 15, 2016
db042bb
small fix.
pegahgh Oct 15, 2016
dce56c8
fixed issues with raw_configs.
pegahgh Oct 15, 2016
8dd3035
fixed incompatibility issues.
pegahgh Oct 17, 2016
5403426
fixed some old comments removed during rabase.
pegahgh Oct 17, 2016
2a2b761
added new prepare_lang_conf.sh with lang name as being named in Babel…
pegahgh Oct 17, 2016
fc5d62c
fixed small issues.
pegahgh Oct 17, 2016
9485ffc
fixed small typos.
pegahgh Dec 7, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions egs/babel_multilang/s5/conf/common.fullLP
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# BNF training parameters
bnf_num_hidden_layers=6
bottleneck_dim=42
bnf_hidden_layer_dim=2048
bnf_minibatch_size=512
bnf_init_learning_rate=0.008
bnf_final_learning_rate=0.0008
bnf_max_change=40
bnf_num_jobs=4
bnf_num_threads=1
bnf_mixup=10000
bnf_mpe_learning_rate=0.00009
bnf_mpe_last_layer_factor=0.1
bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the
# non-bottleneck system (which has 800)
bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the
# non-bottleneck system (which has 80000).
bnf_decode_acwt=0.066666


# DNN hybrid system training parameters
dnn_num_hidden_layers=4
dnn_input_dim=4000
dnn_output_dim=400
dnn_init_learning_rate=0.008
dnn_final_learning_rate=0.0008
dnn_mixup=12000

dnn_mpe_learning_rate=0.00008
dnn_mpe_last_layer_factor=0.1
dnn_mpe_retroactive=true

bnf_every_nth_frame=2 # take every 2nd frame.
babel_type=full

use_pitch=true

lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )

dnn_beam=16.0
dnn_lat_beam=8.5

icu_opt=(--use-icu true --icu-transform Any-Lower)

if [[ `hostname` == *.tacc.utexas.edu ]] ; then
decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
sgmm_train_extra_opts=( )
sgmm_group_extra_opts=( --num_iters 25 )
sgmm_denlats_extra_opts=( --num-threads 2 )
sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
dnn_denlats_extra_opts=( --num-threads 2 )

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" )
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1)

dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
dnn_parallel_opts="-l gpu=1"
else
decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=0.7G")
sgmm_train_extra_opts=( --num-iters 25 )
sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=2.75G" --cmd "queue.pl -l arch=*64 -l mem_free=3.0G,ram_free=3.0G")
sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")
sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=3.2G,ram_free=3.2G")
dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_parallel_opts="-l gpu=1"
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
fi

icu_transform="Any-Lower"
case_insensitive=true


max_states=150000
wip=0.5


phoneme_mapping=

minimize=true

proxy_phone_beam=-1
proxy_phone_nbest=-1
proxy_beam=5
proxy_nbest=500

extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1
104 changes: 104 additions & 0 deletions egs/babel_multilang/s5/conf/common.limitedLP
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# BNF training parameters
bnf_num_hidden_layers=5
bottleneck_dim=42
bnf_hidden_layer_dim=1024
bnf_minibatch_size=512
bnf_init_learning_rate=0.008
bnf_final_learning_rate=0.0008
bnf_max_change=40
bnf_num_jobs=4
bnf_num_threads=1
bnf_mixup=5000
bnf_mpe_learning_rate=0.00009
bnf_mpe_last_layer_factor=0.1
bnf_num_gauss_ubm=500 # use fewer UBM Gaussians than the
# non-bottleneck system (which has 750)
bnf_num_gauss_sgmm=10000 # use fewer SGMM sub-states than the
# non-bottleneck system (which has 18000).
bnf_decode_acwt=0.066666


## DNN hybrid system training parameters
dnn_num_hidden_layers=3
dnn_input_dim=2000
dnn_output_dim=200
dnn_init_learning_rate=0.008
dnn_final_learning_rate=0.0008
dnn_mixup=5000

dnn_mpe_learning_rate=0.00009
dnn_mpe_last_layer_factor=0.1
dnn_mpe_retroactive=true

bnf_every_nth_frame=1 # take all frames.
babel_type=limited

use_pitch=true

lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )

dnn_beam=16.0
dnn_lat_beam=8.5

icu_opt=(--use-icu true --icu-transform Any-Lower)

# Semi-supervised examples options
dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --io-opts "-tc 5" )

if [[ `hostname` == *.tacc.utexas.edu ]] ; then
decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
sgmm_train_extra_opts=( --num-iters 25 )
sgmm_group_extra_opts=( )
sgmm_denlats_extra_opts=( --num-threads 1 )
dnn_denlats_extra_opts=( --num-threads 1 )

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" )
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1
--parallel-opts "-pe smp 16" )

dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1)

dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 )
else
decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G")
sgmm_train_extra_opts=( --num-iters 25 )
sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7.0G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G")
sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G")
sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G")
dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G")

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
dnn_parallel_opts="-l gpu=1"
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")

dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 )
fi

icu_transform="Any-Lower"
case_insensitive=true


max_states=150000
wip=0.5


phoneme_mapping=

minimize=true

proxy_phone_beam=-1
proxy_phone_nbest=-1
proxy_beam=5
proxy_nbest=500

extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1
21 changes: 21 additions & 0 deletions egs/babel_multilang/s5/conf/common_vars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#keyword search default
glmFile=conf/glm
duptime=0.5
case_insensitive=false
use_pitch=true
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="-oov <unk>"
boost_sil=1.5 # note from Dan: I expect 1.0 might be better (equivalent to not
# having the option)... should test.
cer=0

#Declaring here to make the definition inside the language conf files more
# transparent and nice
declare -A dev10h_more_kwlists
declare -A dev2h_more_kwlists
declare -A eval_more_kwlists
declare -A shadow_more_kwlists

[ -f ./path.sh ] && . ./path.sh; # source the path.
[ -f ./cmd.sh ] && . ./cmd.sh; # source train and decode cmds.
Empty file.
Empty file.
1 change: 1 addition & 0 deletions egs/babel_multilang/s5/conf/glm
1 change: 1 addition & 0 deletions egs/babel_multilang/s5/conf/lang
10 changes: 10 additions & 0 deletions egs/babel_multilang/s5/conf/mfcc.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# config for high-resolution MFCC features, intended for neural network training.
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--sample-frequency=8000 # Switchboard is sampled at 8kHz
--low-freq=40 # low cutoff frequency for mel bins
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)


11 changes: 11 additions & 0 deletions egs/babel_multilang/s5/conf/mfcc_hires.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# config for high-resolution MFCC features, intended for neural network training.
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--sample-frequency=8000 # Switchboard is sampled at 8kHz
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=40 # low cutoff frequency for mel bins
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)

1 change: 1 addition & 0 deletions egs/babel_multilang/s5/conf/online_cmvn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
1 change: 1 addition & 0 deletions egs/babel_multilang/s5/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=8000
1 change: 1 addition & 0 deletions egs/babel_multilang/s5/conf/plp.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=8000
10 changes: 10 additions & 0 deletions egs/babel_multilang/s5/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# configuration for the AWS cluster for WS'15.
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=1 -q g.q@b* -l gpu=1
40 changes: 40 additions & 0 deletions egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
# This scripts extract iVector using global iVector extractor
# trained on all languages in multilingual setup.

. ./cmd.sh
set -e
stage=1
train_set=train
global_extractor=exp/multi/nnet3/extractor
ivector_suffix=_gb

[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1

. conf/common_vars.sh || exit 1;

[ -f local.conf ] && . ./local.conf

. ./utils/parse_options.sh

lang=$1

mkdir -p nnet3

if [ $stage -le 8 ]; then
# We extract iVectors on all the train_nodup data, which will be what we
# train the system on.

# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/$lang/${train_set}_hires data/$lang/${train_set}_max2_hires

if [ ! -f exp/$lang/nnet3/ivectors_${train_set}${ivector_suffix}/ivector_online.scp ]; then
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 200 \
data/$lang/${train_set}_max2_hires $global_extractor exp/$lang/nnet3/ivectors_${train_set}${ivector_suffix} || exit 1;
fi

fi


exit 0;
Loading