diff --git a/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh new file mode 100755 index 00000000000..00a6edb8125 --- /dev/null +++ b/egs/ami/s5b/local/rnnlm/tuning/run_lstm_tdnn_bs_1a.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) Tony Robinson +# 2017 Hainan Xu +# 2017 Ke Li +# 2017 Yiming Wang + +# This script is similar to rnnlm_lstm_tdnn_b.sh except for adding backstitch training + +# rnnlm/train_rnnlm.sh: best iteration (out of 18) was 17, linking it to final iteration. +# rnnlm/train_rnnlm.sh: train/dev perplexity was 45.6 / 68.7. +# Train objf: -651.50 -4.44 -4.26 -4.15 -4.08 -4.03 -4.00 -3.97 -3.94 -3.92 -3.90 -3.89 -3.88 -3.86 -3.85 -3.84 -3.83 -3.82 +# Dev objf: -10.76 -4.68 -4.47 -4.38 -4.33 -4.29 -4.28 -4.27 -4.26 -4.26 -4.25 -4.24 -4.24 -4.24 -4.23 -4.23 -4.23 -4.23 + +# Begin configuration section. +cmd=run.pl +affix=1a +embedding_dim=200 +embedding_l2=0.005 # embedding layer l2 regularize +comp_l2=0.005 # component-level l2 regularize +output_l2=0.005 # output-layer l2 regularize +epochs=90 +mic=sdm1 +stage=-10 +train_stage=0 +# backstitch options +alpha=0.8 # backstitch training scale +back_interval=1 # backstitch training interval + +. utils/parse_options.sh +train=data/$mic/train/text +dev=data/$mic/dev/text +wordlist=data/lang/words.txt +text_dir=data/rnnlm/text +dir=exp/rnnlm_lstm_tdnn_bs_$affix +mkdir -p $dir/config +set -e + +for f in $train $dev $wordlist; do + [ ! -f $f ] && \ + echo "$0: expected file $f to exist; search for run.sh and utils/prepare_lang.sh in run.sh" && exit 1 +done + +if [ $stage -le 0 ]; then + mkdir -p $text_dir + cat $train | cut -d ' ' -f2- > $text_dir/ami.txt + cat $dev | cut -d ' ' -f2- > $text_dir/dev.txt +fi + +if [ $stage -le 1 ]; then + cp $wordlist $dir/config/ + n=`cat $dir/config/words.txt | wc -l` + echo " $n" >> $dir/config/words.txt + + # words that are not present in words.txt but are in the training or dev data, will be + # mapped to during training. + echo "" >$dir/config/oov.txt + + cat > $dir/config/data_weights.txt <$dir/config/unigram_probs.txt + + # choose features + rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ + --use-constant-feature=true \ + --top-word-features 10000 \ + --min-frequency 1.0e-03 \ + --special-words=',,,,[noise],[laughter]' \ + $dir/config/words.txt > $dir/config/features.txt + +lstm_opts="l2-regularize=$comp_l2" +tdnn_opts="l2-regularize=$comp_l2" +output_opts="l2-regularize=$output_l2" + + cat >$dir/config/xconfig <