kaldi-asr · pegahgh · Sep 27, 2016 · Sep 27, 2016 · Sep 29, 2016 · Sep 29, 2016
diff --git a/egs/babel_multilang/s5/conf/common.fullLP b/egs/babel_multilang/s5/conf/common.fullLP
@@ -0,0 +1,99 @@
+# BNF training parameters
+bnf_num_hidden_layers=6
+bottleneck_dim=42
+bnf_hidden_layer_dim=2048
+bnf_minibatch_size=512
+bnf_init_learning_rate=0.008
+bnf_final_learning_rate=0.0008
+bnf_max_change=40
+bnf_num_jobs=4
+bnf_num_threads=1
+bnf_mixup=10000
+bnf_mpe_learning_rate=0.00009
+bnf_mpe_last_layer_factor=0.1
+bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the
+                      # non-bottleneck system (which has 800)
+bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the
+                         # non-bottleneck system (which has 80000).
+bnf_decode_acwt=0.066666
+
+
+# DNN hybrid system training parameters
+dnn_num_hidden_layers=4
+dnn_input_dim=4000
+dnn_output_dim=400
+dnn_init_learning_rate=0.008
+dnn_final_learning_rate=0.0008
+dnn_mixup=12000
+
+dnn_mpe_learning_rate=0.00008
+dnn_mpe_last_layer_factor=0.1
+dnn_mpe_retroactive=true
+
+bnf_every_nth_frame=2 # take every 2nd frame.
+babel_type=full
+
+use_pitch=true
+
+lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
+lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
+lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
+
+dnn_beam=16.0
+dnn_lat_beam=8.5
+
+icu_opt=(--use-icu true --icu-transform Any-Lower)
+
+if [[ `hostname` == *.tacc.utexas.edu ]] ; then
+  decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
+  sgmm_train_extra_opts=( )
+  sgmm_group_extra_opts=( --num_iters 25 ) 
+  sgmm_denlats_extra_opts=( --num-threads 2 )
+  sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
+  dnn_denlats_extra_opts=( --num-threads 2 )
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" )
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1)
+
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
+  dnn_parallel_opts="-l gpu=1"
+else
+  decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=0.7G")
+  sgmm_train_extra_opts=( --num-iters 25 )
+  sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=2.75G" --cmd "queue.pl -l arch=*64 -l mem_free=3.0G,ram_free=3.0G") 
+  sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")
+  sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=3.2G,ram_free=3.2G")
+  dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \
+                         --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
+  dnn_parallel_opts="-l gpu=1"
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \
+                             --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
+fi
+
+icu_transform="Any-Lower"
+case_insensitive=true
+
+
+max_states=150000
+wip=0.5
+
+
+phoneme_mapping=
+
+minimize=true
+
+proxy_phone_beam=-1
+proxy_phone_nbest=-1
+proxy_beam=5
+proxy_nbest=500
+
+extlex_proxy_phone_beam=5
+extlex_proxy_phone_nbest=300
+extlex_proxy_beam=-1
+extlex_proxy_nbest=-1
diff --git a/egs/babel_multilang/s5/conf/common.limitedLP b/egs/babel_multilang/s5/conf/common.limitedLP
@@ -0,0 +1,104 @@
+# BNF training parameters
+bnf_num_hidden_layers=5
+bottleneck_dim=42
+bnf_hidden_layer_dim=1024
+bnf_minibatch_size=512
+bnf_init_learning_rate=0.008
+bnf_final_learning_rate=0.0008
+bnf_max_change=40
+bnf_num_jobs=4
+bnf_num_threads=1
+bnf_mixup=5000
+bnf_mpe_learning_rate=0.00009
+bnf_mpe_last_layer_factor=0.1
+bnf_num_gauss_ubm=500 # use fewer UBM Gaussians than the
+                      # non-bottleneck system (which has 750)
+bnf_num_gauss_sgmm=10000 # use fewer SGMM sub-states than the
+                         # non-bottleneck system (which has 18000).
+bnf_decode_acwt=0.066666
+
+
+## DNN hybrid system training parameters
+dnn_num_hidden_layers=3
+dnn_input_dim=2000
+dnn_output_dim=200
+dnn_init_learning_rate=0.008
+dnn_final_learning_rate=0.0008
+dnn_mixup=5000
+
+dnn_mpe_learning_rate=0.00009
+dnn_mpe_last_layer_factor=0.1
+dnn_mpe_retroactive=true
+
+bnf_every_nth_frame=1 # take all frames.
+babel_type=limited
+
+use_pitch=true
+
+lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 )
+lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
+lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
+
+dnn_beam=16.0
+dnn_lat_beam=8.5
+
+icu_opt=(--use-icu true --icu-transform Any-Lower)
+
+# Semi-supervised examples options
+dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --io-opts "-tc 5" )
+
+if [[ `hostname` == *.tacc.utexas.edu ]] ; then
+  decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" )
+  sgmm_train_extra_opts=( --num-iters 25 )
+  sgmm_group_extra_opts=( ) 
+  sgmm_denlats_extra_opts=( --num-threads 1 )
+  dnn_denlats_extra_opts=( --num-threads 1 )
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" )
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1
+                         --parallel-opts "-pe smp 16" )
+
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1)
+
+  dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 )
+else
+  decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G")
+  sgmm_train_extra_opts=( --num-iters 25 )
+  sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7.0G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") 
+  sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G")
+  sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G")
+  dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G")
+
+  dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
+                         --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
+  dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \
+                         --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
+  dnn_parallel_opts="-l gpu=1"
+  dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \
+                             --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
+
+  dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 )
+fi
+
+icu_transform="Any-Lower"
+case_insensitive=true
+
+
+max_states=150000
+wip=0.5
+
+
+phoneme_mapping=
+
+minimize=true
+
+proxy_phone_beam=-1
+proxy_phone_nbest=-1
+proxy_beam=5
+proxy_nbest=500
+
+extlex_proxy_phone_beam=5
+extlex_proxy_phone_nbest=300
+extlex_proxy_beam=-1
+extlex_proxy_nbest=-1
diff --git a/egs/babel_multilang/s5/conf/common.semisupervised.limitedLP b/egs/babel_multilang/s5/conf/common.semisupervised.limitedLP
@@ -0,0 +1 @@
+../../../babel/s5c/conf/common.semisupervised.limitedLP
diff --git a/egs/babel_multilang/s5/conf/common_vars.sh b/egs/babel_multilang/s5/conf/common_vars.sh
@@ -0,0 +1,21 @@
+#keyword search default
+glmFile=conf/glm
+duptime=0.5
+case_insensitive=false
+use_pitch=true
+# Lexicon and Language Model parameters
+oovSymbol="<unk>"
+lexiconFlags="-oov <unk>"
+boost_sil=1.5 #  note from Dan: I expect 1.0 might be better (equivalent to not
+              # having the option)... should test.
+cer=0
+
+#Declaring here to make the definition inside the language conf files more
+# transparent and nice
+declare -A dev10h_more_kwlists
+declare -A dev2h_more_kwlists
+declare -A eval_more_kwlists
+declare -A shadow_more_kwlists
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+[ -f ./cmd.sh ] && . ./cmd.sh; # source train and decode cmds.
diff --git a/egs/babel_multilang/s5/conf/decode.config b/egs/babel_multilang/s5/conf/decode.config
diff --git a/egs/babel_multilang/s5/conf/decode_dnn.config b/egs/babel_multilang/s5/conf/decode_dnn.config
diff --git a/egs/babel_multilang/s5/conf/glm b/egs/babel_multilang/s5/conf/glm
@@ -0,0 +1 @@
+../../../babel/s5c/conf/glm
diff --git a/egs/babel_multilang/s5/conf/lang b/egs/babel_multilang/s5/conf/lang
@@ -0,0 +1 @@
+../../../babel/s5c/conf/lang
diff --git a/egs/babel_multilang/s5/conf/mfcc.conf b/egs/babel_multilang/s5/conf/mfcc.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=8000 #  Switchboard is sampled at 8kHz
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
+
+
diff --git a/egs/babel_multilang/s5/conf/mfcc_hires.conf b/egs/babel_multilang/s5/conf/mfcc_hires.conf
@@ -0,0 +1,11 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=8000 #  Switchboard is sampled at 8kHz
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
+
diff --git a/egs/babel_multilang/s5/conf/online_cmvn.conf b/egs/babel_multilang/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/babel_multilang/s5/conf/pitch.conf b/egs/babel_multilang/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=8000
diff --git a/egs/babel_multilang/s5/conf/plp.conf b/egs/babel_multilang/s5/conf/plp.conf
@@ -0,0 +1 @@
+--sample-frequency=8000
diff --git a/egs/babel_multilang/s5/conf/queue.conf b/egs/babel_multilang/s5/conf/queue.conf
@@ -0,0 +1,10 @@
+# configuration for the AWS cluster for WS'15.
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0
+option gpu=1 -q g.q@b* -l gpu=1
diff --git a/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh b/egs/babel_multilang/s5/local/nnet3/extract_ivector_lang.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# This scripts extract iVector using global iVector extractor
+# trained on all languages in multilingual setup.
+
+. ./cmd.sh
+set -e
+stage=1
+train_set=train
+global_extractor=exp/multi/nnet3/extractor
+ivector_suffix=_gb
+
+[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1
+
+. conf/common_vars.sh || exit 1;
+
+[ -f local.conf ] && . ./local.conf
+
+. ./utils/parse_options.sh
+
+lang=$1
+
+mkdir -p nnet3
+
+if [ $stage -le 8 ]; then
+  # We extract iVectors on all the train_nodup data, which will be what we
+  # train the system on.
+
+  # having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (iVector starts at zero).
+  steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/$lang/${train_set}_hires data/$lang/${train_set}_max2_hires
+
+  if [ ! -f exp/$lang/nnet3/ivectors_${train_set}${ivector_suffix}/ivector_online.scp ]; then
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 200 \
+      data/$lang/${train_set}_max2_hires $global_extractor exp/$lang/nnet3/ivectors_${train_set}${ivector_suffix} || exit 1;
+  fi
+
+fi
+
+
+exit 0;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../babel/s5c/conf/common.semisupervised.limitedLP
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh