-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Sprak nnet3 #1402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sprak nnet3 #1402
Changes from all commits
1472b0b
e840588
b0eba63
bec69c2
dbca511
d3d4e41
fbcaf6e
d91019e
3c21d80
fcdc807
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,28 @@ | ||
| %WER 49.19 [ 5318 / 10811, 481 ins, 1511 del, 3326 sub ] exp/mono0a/decode_3g_test1k/wer_9 | ||
| %WER 47.28 [ 5111 / 10811, 443 ins, 1489 del, 3179 sub ] exp/mono0a/decode_b3g_test1k/wer_10 | ||
| %WER 16.19 [ 1750 / 10811, 397 ins, 323 del, 1030 sub ] exp/sgmm2_5a/decode_3g_test1k/wer_9 | ||
| %WER 15.10 [ 1632 / 10811, 404 ins, 305 del, 923 sub ] exp/sgmm2_5b/decode_3g_test1k/wer_9 | ||
| %WER 14.94 [ 1615 / 10811, 390 ins, 310 del, 915 sub ] exp/sgmm2_5b/decode_4g_test1k/wer_9 | ||
| %WER 14.36 [ 1553 / 10811, 376 ins, 264 del, 913 sub ] exp/sgmm2_5c/decode_3g_test1k/wer_9 | ||
| %WER 14.18 [ 1533 / 10811, 367 ins, 266 del, 900 sub ] exp/sgmm2_5c/decode_4g_test1k/wer_9 | ||
| %WER 25.61 [ 2769 / 10811, 511 ins, 539 del, 1719 sub ] exp/tri1/decode_3g_test1k/wer_10 | ||
| %WER 25.12 [ 2716 / 10811, 444 ins, 571 del, 1701 sub ] exp/tri1/decode_b3g_test1k/wer_11 | ||
| %WER 23.81 [ 2574 / 10811, 426 ins, 564 del, 1584 sub ] exp/tri2a/decode_3g_test1k/wer_12 | ||
| %WER 23.22 [ 2510 / 10811, 457 ins, 517 del, 1536 sub ] exp/tri2a/decode_3g_test1k_fromlats/wer_11 | ||
| %WER 22.18 [ 2398 / 10811, 436 ins, 495 del, 1467 sub ] exp/tri2b/decode_3g_test1k/wer_11 | ||
| %WER 21.87 [ 2364 / 10811, 380 ins, 553 del, 1431 sub ] exp/tri2b/decode_3g_test1k_mbr/wer_13 | ||
| %WER 18.98 [ 2052 / 10811, 451 ins, 372 del, 1229 sub ] exp/tri3b_20k/decode_3g_test1k/wer_11 | ||
| %WER 22.62 [ 2445 / 10811, 468 ins, 460 del, 1517 sub ] exp/tri3b_20k/decode_3g_test1k.si/wer_10 | ||
| %WER 19.31 [ 2088 / 10811, 440 ins, 388 del, 1260 sub ] exp/tri3b/decode_3g_test1k/wer_11 | ||
| %WER 23.19 [ 2507 / 10811, 435 ins, 520 del, 1552 sub ] exp/tri3b/decode_3g_test1k.si/wer_12 | ||
| %WER 19.06 [ 2061 / 10811, 427 ins, 384 del, 1250 sub ] exp/tri3b/decode_4g_test1k/wer_11 | ||
| %WER 23.20 [ 2508 / 10811, 447 ins, 520 del, 1541 sub ] exp/tri3b/decode_4g_test1k.si/wer_11 | ||
| %WER 17.42 [ 1883 / 10811, 416 ins, 359 del, 1108 sub ] exp/tri4a/decode_3g_test1k/wer_13 | ||
| %WER 20.86 [ 2255 / 10811, 403 ins, 473 del, 1379 sub ] exp/tri4a/decode_3g_test1k.si/wer_13 | ||
| %WER 17.52 [ 1894 / 10811, 396 ins, 372 del, 1126 sub ] exp/tri4b/decode_3g_test1k/wer_13 | ||
| %WER 20.82 [ 2251 / 10811, 399 ins, 471 del, 1381 sub ] exp/tri4b/decode_3g_test1k.si/wer_13 | ||
| %WER 17.53 [ 1895 / 10811, 403 ins, 375 del, 1117 sub ] exp/tri4b/decode_4g_test1k/wer_13 | ||
| %WER 20.99 [ 2269 / 10811, 438 ins, 436 del, 1395 sub ] exp/tri4b/decode_4g_test1k.si/wer_11 | ||
| GMM-based systems | ||
| %WER 22.87 [ 24286 / 106172, 3577 ins, 5321 del, 15388 sub ] exp/tri1/decode_fg_dev/wer_12_0.5 | ||
| %WER 23.13 [ 24561 / 106172, 3602 ins, 5411 del, 15548 sub ] exp/tri1/decode_tg_dev/wer_12_0.5 | ||
| %WER 21.24 [ 22548 / 106172, 4028 ins, 4246 del, 14274 sub ] exp/tri2a/decode_tg_dev/wer_13_0.0 | ||
| %WER 19.46 [ 20664 / 106172, 3276 ins, 4332 del, 13056 sub ] exp/tri2b/decode_tg_dev/wer_15_0.5 | ||
| %WER 16.80 [ 17839 / 106172, 3238 ins, 3403 del, 11198 sub ] exp/tri3b/decode_fg_dev/wer_17_0.0 | ||
| %WER 19.45 [ 20651 / 106172, 3880 ins, 3671 del, 13100 sub ] exp/tri3b/decode_fg_dev.si/wer_15_0.0 | ||
| %WER 14.24 [ 9849 / 69165, 2046 ins, 1365 del, 6438 sub ] exp/tri3b/decode_fg_test/wer_16_0.5 | ||
| %WER 17.31 [ 11972 / 69165, 2330 ins, 1695 del, 7947 sub ] exp/tri3b/decode_fg_test.si/wer_15_0.5 | ||
| %WER 16.94 [ 17984 / 106172, 3361 ins, 3377 del, 11246 sub ] exp/tri3b/decode_tg_dev/wer_16_0.0 | ||
| %WER 19.52 [ 20720 / 106172, 3654 ins, 3846 del, 13220 sub ] exp/tri3b/decode_tg_dev.si/wer_17_0.0 | ||
| %WER 14.40 [ 9957 / 69165, 2291 ins, 1184 del, 6482 sub ] exp/tri3b/decode_tg_test/wer_16_0.0 | ||
| %WER 17.41 [ 12044 / 69165, 2291 ins, 1736 del, 8017 sub ] exp/tri3b/decode_tg_test.si/wer_15_0.5 | ||
| nnet3 xent systems | ||
| %WER 11.57 [ 12279 / 106172, 2640 ins, 2442 del, 7197 sub ] exp/nnet3/tdnn0_sp/decode_dev/wer_10_0.0 | ||
| %WER 9.89 [ 6841 / 69165, 1542 ins, 917 del, 4382 sub ] exp/nnet3/tdnn0_sp/decode_test/wer_11_0.5 | ||
| %WER 10.45 [ 11098 / 106172, 2199 ins, 2272 del, 6627 sub ] exp/nnet3/lstm_0_ld5_sp/decode_dev/wer_9_0.0 | ||
| %WER 12.34 [ 8533 / 69165, 1740 ins, 1393 del, 5400 sub ] exp/nnet3/lstm_0_ld5_sp/decode_test/wer_11_1.0 | ||
| %WER 10.59 [ 11241 / 106172, 2208 ins, 2304 del, 6729 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_dev/wer_9_0.0 | ||
| %WER 12.43 [ 8596 / 69165, 1742 ins, 1426 del, 5428 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_test/wer_11_1.0 | ||
| %WER 9.18 [ 9747 / 106172, 1987 ins, 1913 del, 5847 sub ] exp/nnet3/lstm_bidirectional_sp/decode_dev/wer_8_0.0 | ||
| Nnet3 chain systems | ||
| %WER 8.48 [ 9001 / 106172, 1559 ins, 1624 del, 5818 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_dev/wer_9_0.0 | ||
| %WER 7.20 [ 4981 / 69165, 915 ins, 402 del, 3664 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_test/wer_8_1.0 | ||
| %WER 10.00 [ 10619 / 106172, 1980 ins, 1896 del, 6743 sub ] exp/chain/tdnn_sp_bi/decode_dev/wer_9_0.0 | ||
| %WER 8.58 [ 5936 / 69165, 1059 ins, 667 del, 4210 sub ] exp/chain/tdnn_sp_bi/decode_test/wer_9_1.0 | ||
| %WER 9.39 [ 9969 / 106172, 1624 ins, 1912 del, 6433 sub ] exp/chain/lstm1e_sp_bi/decode_dev/wer_8_0.5 | ||
| %WER 7.72 [ 5341 / 69165, 1002 ins, 497 del, 3842 sub ] exp/chain/lstm1e_sp_bi/decode_test/wer_8_0.5 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # config for high-resolution MFCC features, intended for neural network training | ||
| # Note: we keep all cepstra, so it has the same info as filterbank features, | ||
| # but MFCC is more easily compressible (because less correlated) which is why | ||
| # we prefer this method. | ||
| --use-energy=false # use average of log energy, not energy. | ||
| --num-mel-bins=40 # similar to Google's setup. | ||
| --num-ceps=40 # there is no dimensionality reduction. | ||
| --low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so | ||
| # there might be some information at the low end. | ||
| # Needs to be this low to be sensitive to creaky voice | ||
| --high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Prints a table makes it easy to compare WER and objective values across nnet3 | ||
| # and chain training runs | ||
|
|
||
| echo -n "System " | ||
| for x in "$@"; do printf "% 10s" $x; done | ||
| echo | ||
|
|
||
| echo -n "WER on dev(tg) " | ||
| for x in "$@"; do | ||
| wer=$(grep WER ${x}/decode_dev/wer_* | utils/best_wer.sh | awk '{print $2}') | ||
| printf "% 10s" $wer | ||
| done | ||
| echo | ||
|
|
||
| echo -n "WER on test(tg) " | ||
| for x in "$@"; do | ||
| wer=$(grep WER ${x}/decode_test/wer_* | utils/best_wer.sh | awk '{print $2}') | ||
| printf "% 10s" $wer | ||
| done | ||
| echo | ||
|
|
||
| echo -n "Final train prob " | ||
| for x in "$@"; do | ||
| prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') | ||
| printf "% 10s" $prob | ||
| done | ||
| echo | ||
|
|
||
| echo -n "Final valid prob " | ||
| for x in "$@"; do | ||
| prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') | ||
| printf "% 10s" $prob | ||
| done | ||
| echo | ||
|
|
||
| echo -n "Final train prob (xent) " | ||
| for x in "$@"; do | ||
| prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') | ||
| printf "% 10s" $prob | ||
| done | ||
| echo | ||
|
|
||
| echo -n "Final valid prob (xent) " | ||
| for x in "$@"; do | ||
| prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') | ||
| printf "% 10s" $prob | ||
| done | ||
| echo |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| tuning/run_lstm_1e.sh |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| tuning/run_tdnn_1b.sh | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't really matter, but best to use #!/bin/bash at the top here. Same for
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, you have
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It wasn't in the tedlium folder I copied, but I'll add it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually no need to have the 'just-lstm' number, generally TDNN+LSTM will work better. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| tuning/run_tdnn_lstm_1a.sh |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| #!/bin/bash | ||
|
|
||
| # run_lstm_1a.sh is a first attempt at an LSTM system, based on xconfigs-- it's | ||
| # probably not very well configured, e.g. the num-params might be too small. | ||
| # recurrent-projection-dim is less than non-recurrent-projection-dim due to an | ||
| # oversight. | ||
|
|
||
| # comparison with TDNN system (WER is worse): | ||
| # local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/lstm1a_sp_bi | ||
| # System tdnn1b_sp_bi lstm1a_sp_bi | ||
| # WER on dev(orig) 10.2 10.8 | ||
| # WER on dev(rescored) 9.6 10.2 | ||
| # WER on test(orig) 9.7 10.0 | ||
| # WER on test(rescored) 9.2 9.6 | ||
| # Final train prob -0.0928 -0.0848 | ||
| # Final valid prob -0.1178 -0.1098 | ||
| # Final train prob (xent) -1.4666 -1.1692 | ||
| # Final valid prob (xent) -1.5473 -1.2520 | ||
|
|
||
|
|
||
| ## how you run this (note: this assumes that the run_lstm.sh soft link points here; | ||
| ## otherwise call it directly in its location). | ||
| # by default, with cleanup: | ||
| # local/chain/run_lstm.sh | ||
|
|
||
| # without cleanup: | ||
| # local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & | ||
|
|
||
| # note, if you have already run one of the non-chain nnet3 systems | ||
| # (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. | ||
|
|
||
| # This script (run_lstm_1a) is like run_tdnn_1b.sh except modified to use an LSTM | ||
| # configuration (some aspects borrowed from egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh). | ||
|
|
||
|
|
||
| set -e -o pipefail | ||
|
|
||
| # First the options that are passed through to run_ivector_common.sh | ||
| # (some of which are also used in this script directly). | ||
| stage=0 | ||
| nj=30 | ||
| decode_nj=30 | ||
| min_seg_len=1.55 | ||
| chunk_left_context=40 | ||
| chunk_right_context=0 | ||
| label_delay=5 | ||
| xent_regularize=0.1 | ||
| train_set=train_cleaned | ||
| gmm=tri3_cleaned # the gmm for the target data | ||
| num_threads_ubm=32 | ||
| nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned | ||
| # decode options | ||
| extra_left_context=50 | ||
| extra_right_context=0 | ||
| frames_per_chunk=150 | ||
|
|
||
| # The rest are configs specific to this script. Most of the parameters | ||
| # are just hardcoded at this level, in the commands below. | ||
| train_stage=-10 | ||
| tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. | ||
| lstm_affix=1a #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. | ||
| common_egs_dir= # you can set this to use previously dumped egs. | ||
|
|
||
| # End configuration section. | ||
| echo "$0 $@" # Print the command line for logging | ||
|
|
||
| . cmd.sh | ||
| . ./path.sh | ||
| . ./utils/parse_options.sh | ||
|
|
||
|
|
||
| if ! cuda-compiled; then | ||
| cat <<EOF && exit 1 | ||
| This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA | ||
| If you want to use GPUs (and have them), go to src/, and configure and make on a machine | ||
| where "nvcc" is installed. | ||
| EOF | ||
| fi | ||
|
|
||
| local/nnet3/run_ivector_common.sh --stage $stage \ | ||
| --nj $nj \ | ||
| --min-seg-len $min_seg_len \ | ||
| --train-set $train_set \ | ||
| --gmm $gmm \ | ||
| --num-threads-ubm $num_threads_ubm \ | ||
| --nnet3-affix "$nnet3_affix" | ||
|
|
||
|
|
||
| gmm_dir=exp/$gmm | ||
| ali_dir=exp/${gmm}_ali_${train_set}_sp_comb | ||
| tree_dir=exp/chain${nnet3_affix}/tree_bi${tree_affix} | ||
| lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats | ||
| dir=exp/chain${nnet3_affix}/lstm${lstm_affix}_sp_bi | ||
| train_data_dir=data/${train_set}_sp_hires_comb | ||
| lores_train_data_dir=data/${train_set}_sp_comb | ||
| train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb | ||
|
|
||
|
|
||
| for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ | ||
| $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz $gmm_dir/final.mdl; do | ||
| [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 | ||
| done | ||
|
|
||
| if [ $stage -le 14 ]; then | ||
| echo "$0: creating lang directory with one state per phone." | ||
| # Create a version of the lang/ directory that has one state per phone in the | ||
| # topo file. [note, it really has two states.. the first one is only repeated | ||
| # once, the second one has zero or more repeats.] | ||
| if [ -d data/lang_chain ]; then | ||
| if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then | ||
| echo "$0: data/lang_chain already exists, not overwriting it; continuing" | ||
| else | ||
| echo "$0: data/lang_chain already exists and seems to be older than data/lang..." | ||
| echo " ... not sure what to do. Exiting." | ||
| exit 1; | ||
| fi | ||
| else | ||
| cp -r data/lang data/lang_chain | ||
| silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1; | ||
| nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1; | ||
| # Use our special topology... note that later on may have to tune this | ||
| # topology. | ||
| steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo | ||
| fi | ||
| fi | ||
|
|
||
| if [ $stage -le 15 ]; then | ||
| # Get the alignments as lattices (gives the chain training more freedom). | ||
| # use the same num-jobs as the alignments | ||
| steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ | ||
| data/lang $gmm_dir $lat_dir | ||
| rm $lat_dir/fsts.*.gz # save space | ||
| fi | ||
|
|
||
| if [ $stage -le 16 ]; then | ||
| # Build a tree using our new topology. We know we have alignments for the | ||
| # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use | ||
| # those. | ||
| if [ -f $tree_dir/final.mdl ]; then | ||
| echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." | ||
| exit 1; | ||
| fi | ||
| steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ | ||
| --context-opts "--context-width=2 --central-position=1" \ | ||
| --leftmost-questions-truncate -1 \ | ||
| --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir | ||
| fi | ||
|
|
||
|
|
||
| if [ $stage -le 17 ]; then | ||
| mkdir -p $dir | ||
| echo "$0: creating neural net configs using the xconfig parser"; | ||
|
|
||
| num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') | ||
| learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) | ||
|
|
||
| mkdir -p $dir/configs | ||
| cat <<EOF > $dir/configs/network.xconfig | ||
| input dim=100 name=ivector | ||
| input dim=40 name=input | ||
|
|
||
| # please note that it is important to have input layer with the name=input | ||
| # as the layer immediately preceding the fixed-affine-layer to enable | ||
| # the use of short notation for the descriptor | ||
| fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat | ||
|
|
||
| # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults | ||
| lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 | ||
| lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 | ||
| lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 | ||
|
|
||
| ## adding the layers for chain branch | ||
| output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 | ||
|
|
||
| # adding the layers for xent branch | ||
| # This block prints the configs for a separate output that will be | ||
| # trained with a cross-entropy objective in the 'chain' models... this | ||
| # has the effect of regularizing the hidden parts of the model. we use | ||
| # 0.5 / args.xent_regularize as the learning rate factor- the factor of | ||
| # 0.5 / args.xent_regularize is suitable as it means the xent | ||
| # final-layer learns at a rate independent of the regularization | ||
| # constant; and the 0.5 was tuned so as to make the relative progress | ||
| # similar in the xent and regular final layers. | ||
| output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 | ||
|
|
||
| EOF | ||
| steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ | ||
| fi | ||
|
|
||
|
|
||
| if [ $stage -le 18 ]; then | ||
| if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then | ||
| utils/create_split_dir.pl \ | ||
| /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage | ||
| fi | ||
|
|
||
| steps/nnet3/chain/train.py --stage $train_stage \ | ||
| --cmd "$decode_cmd" \ | ||
| --feat.online-ivector-dir $train_ivector_dir \ | ||
| --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ | ||
| --chain.xent-regularize 0.1 \ | ||
| --chain.leaky-hmm-coefficient 0.1 \ | ||
| --chain.l2-regularize 0.00005 \ | ||
| --chain.apply-deriv-weights false \ | ||
| --chain.lm-opts="--num-extra-lm-states=2000" \ | ||
| --egs.dir "$common_egs_dir" \ | ||
| --egs.opts "--frames-overlap-per-eg 0" \ | ||
| --egs.chunk-width "$frames_per_chunk" \ | ||
| --egs.chunk-left-context "$chunk_left_context" \ | ||
| --egs.chunk-right-context "$chunk_right_context" \ | ||
| --trainer.num-chunk-per-minibatch 128 \ | ||
| --trainer.frames-per-iter 1500000 \ | ||
| --trainer.max-param-change 2.0 \ | ||
| --trainer.num-epochs 4 \ | ||
| --trainer.deriv-truncate-margin 10 \ | ||
| --trainer.optimization.shrink-value 0.99 \ | ||
| --trainer.optimization.num-jobs-initial 2 \ | ||
| --trainer.optimization.num-jobs-final 12 \ | ||
| --trainer.optimization.initial-effective-lrate 0.001 \ | ||
| --trainer.optimization.final-effective-lrate 0.0001 \ | ||
| --trainer.optimization.momentum 0.0 \ | ||
| --cleanup.remove-egs true \ | ||
| --feat-dir $train_data_dir \ | ||
| --tree-dir $tree_dir \ | ||
| --lat-dir $lat_dir \ | ||
| --dir $dir | ||
| fi | ||
|
|
||
|
|
||
|
|
||
| if [ $stage -le 19 ]; then | ||
| # Note: it might appear that this data/lang_chain directory is mismatched, and it is as | ||
| # far as the 'topo' is concerned, but this script doesn't read the 'topo' from | ||
| # the lang directory. | ||
| utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph | ||
| fi | ||
|
|
||
| if [ $stage -le 20 ]; then | ||
| rm $dir/.error 2>/dev/null || true | ||
| for dset in dev test; do | ||
| ( | ||
| steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ | ||
| --acwt 1.0 --post-decode-acwt 10.0 \ | ||
| --extra-left-context $extra_left_context \ | ||
| --extra-right-context $extra_right_context \ | ||
| --frames-per-chunk "$frames_per_chunk" \ | ||
| --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ | ||
| --scoring-opts "--min-lmwt 5 " \ | ||
| $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; | ||
| steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ | ||
| data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 | ||
| ) || touch $dir/.error & | ||
| done | ||
| wait | ||
| if [ -f $dir/.error ]; then | ||
| echo "$0: something went wrong in decoding" | ||
| exit 1 | ||
| fi | ||
| fi | ||
| exit 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
local/run_online_decoding.shdoes not exist for this recipe. Maybe you accidentally copied this file from tedlium.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it is unused, delete it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the next thing I want to work on.