diff --git a/egs/librispeech/s5/RESULTS b/egs/librispeech/s5/RESULTS index ca3806bd168..6f175b3c97b 100644 --- a/egs/librispeech/s5/RESULTS +++ b/egs/librispeech/s5/RESULTS @@ -443,6 +443,92 @@ %WER 15.10 [ 7904 / 52343, 874 ins, 1070 del, 5960 sub ] exp/nnet3/tdnn_sp/decode_test_other_tgmed/wer_13_0.0 %WER 16.29 [ 8528 / 52343, 828 ins, 1320 del, 6380 sub ] exp/nnet3/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0 +# Results with nnet3 tdnn+sMBR +# local/nnet3/run_tdnn_discriminative.sh +# a subset of the full list of results (using the acoustic model obtained at the end of the training): +%WER 4.07 [ 2214 / 54402, 313 ins, 195 del, 1706 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_fglarge_epoch4.adj/wer_14_1.0 +%WER 4.20 [ 2286 / 54402, 337 ins, 206 del, 1743 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tglarge_epoch4.adj/wer_17_0.5 +%WER 5.19 [ 2825 / 54402, 319 ins, 328 del, 2178 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgmed_epoch4.adj/wer_17_0.5 +%WER 5.84 [ 3177 / 54402, 407 ins, 313 del, 2457 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgsmall_epoch4.adj/wer_15_0.0 +%WER 11.07 [ 5641 / 50948, 745 ins, 577 del, 4319 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_fglarge_epoch4.adj/wer_20_0.5 +%WER 11.43 [ 5821 / 50948, 782 ins, 603 del, 4436 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tglarge_epoch4.adj/wer_18_0.5 +%WER 13.45 [ 6853 / 50948, 873 ins, 759 del, 5221 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgmed_epoch4.adj/wer_19_0.0 +%WER 14.43 [ 7350 / 50948, 869 ins, 881 del, 5600 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgsmall_epoch4.adj/wer_18_0.0 +%WER 4.58 [ 2409 / 52576, 370 ins, 197 del, 1842 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_fglarge_epoch4.adj/wer_14_1.0 +%WER 4.75 [ 2496 / 52576, 408 ins, 200 del, 1888 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tglarge_epoch4.adj/wer_15_0.5 +%WER 5.72 [ 3005 / 52576, 397 ins, 325 del, 2283 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgmed_epoch4.adj/wer_17_0.5 +%WER 6.44 [ 3387 / 52576, 466 ins, 326 del, 2595 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgsmall_epoch4.adj/wer_15_0.0 +%WER 11.30 [ 5913 / 52343, 708 ins, 701 del, 4504 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_fglarge_epoch4.adj/wer_16_1.0 +%WER 11.71 [ 6128 / 52343, 798 ins, 633 del, 4697 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tglarge_epoch4.adj/wer_17_0.5 +%WER 13.72 [ 7179 / 52343, 853 ins, 783 del, 5543 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgmed_epoch4.adj/wer_17_0.0 +%WER 14.83 [ 7761 / 52343, 821 ins, 962 del, 5978 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgsmall_epoch4.adj/wer_18_0.0 + +# the full list of results +%WER 4.14 [ 2254 / 54402, 319 ins, 204 del, 1731 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_fglarge_epoch1.adj/wer_13_1.0 +%WER 4.12 [ 2243 / 54402, 306 ins, 214 del, 1723 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_fglarge_epoch2.adj/wer_15_1.0 +%WER 4.10 [ 2228 / 54402, 316 ins, 199 del, 1713 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_fglarge_epoch3.adj/wer_14_1.0 +%WER 4.07 [ 2214 / 54402, 313 ins, 195 del, 1706 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_fglarge_epoch4.adj/wer_14_1.0 +%WER 4.24 [ 2306 / 54402, 292 ins, 240 del, 1774 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tglarge_epoch1.adj/wer_15_1.0 +%WER 4.24 [ 2307 / 54402, 357 ins, 186 del, 1764 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tglarge_epoch2.adj/wer_14_0.5 +%WER 4.23 [ 2303 / 54402, 310 ins, 232 del, 1761 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tglarge_epoch3.adj/wer_16_1.0 +%WER 4.20 [ 2286 / 54402, 337 ins, 206 del, 1743 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tglarge_epoch4.adj/wer_17_0.5 +%WER 5.35 [ 2912 / 54402, 306 ins, 344 del, 2262 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgmed_epoch1.adj/wer_13_1.0 +%WER 5.24 [ 2850 / 54402, 295 ins, 342 del, 2213 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgmed_epoch2.adj/wer_14_1.0 +%WER 5.21 [ 2836 / 54402, 347 ins, 297 del, 2192 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgmed_epoch3.adj/wer_15_0.5 +%WER 5.19 [ 2825 / 54402, 319 ins, 328 del, 2178 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgmed_epoch4.adj/wer_17_0.5 +%WER 5.99 [ 3259 / 54402, 412 ins, 324 del, 2523 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgsmall_epoch1.adj/wer_14_0.0 +%WER 5.89 [ 3204 / 54402, 416 ins, 306 del, 2482 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgsmall_epoch2.adj/wer_14_0.0 +%WER 5.84 [ 3179 / 54402, 417 ins, 302 del, 2460 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgsmall_epoch3.adj/wer_14_0.0 +%WER 5.84 [ 3177 / 54402, 407 ins, 313 del, 2457 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_clean_tgsmall_epoch4.adj/wer_15_0.0 +%WER 10.98 [ 5593 / 50948, 849 ins, 480 del, 4264 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_fglarge_epoch1.adj/wer_16_0.0 +%WER 10.98 [ 5596 / 50948, 727 ins, 600 del, 4269 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_fglarge_epoch2.adj/wer_20_0.5 +%WER 10.94 [ 5576 / 50948, 752 ins, 555 del, 4269 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_fglarge_epoch3.adj/wer_18_0.5 +%WER 11.07 [ 5641 / 50948, 745 ins, 577 del, 4319 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_fglarge_epoch4.adj/wer_20_0.5 +%WER 11.44 [ 5826 / 50948, 764 ins, 619 del, 4443 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tglarge_epoch1.adj/wer_15_0.5 +%WER 11.32 [ 5766 / 50948, 775 ins, 595 del, 4396 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tglarge_epoch2.adj/wer_16_0.5 +%WER 11.28 [ 5749 / 50948, 759 ins, 615 del, 4375 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tglarge_epoch3.adj/wer_18_0.5 +%WER 11.43 [ 5821 / 50948, 782 ins, 603 del, 4436 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tglarge_epoch4.adj/wer_18_0.5 +%WER 13.51 [ 6885 / 50948, 838 ins, 803 del, 5244 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgmed_epoch1.adj/wer_17_0.0 +%WER 13.39 [ 6824 / 50948, 887 ins, 717 del, 5220 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgmed_epoch2.adj/wer_16_0.0 +%WER 13.35 [ 6803 / 50948, 894 ins, 730 del, 5179 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgmed_epoch3.adj/wer_17_0.0 +%WER 13.45 [ 6853 / 50948, 873 ins, 759 del, 5221 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgmed_epoch4.adj/wer_19_0.0 +%WER 14.63 [ 7452 / 50948, 831 ins, 946 del, 5675 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgsmall_epoch1.adj/wer_17_0.0 +%WER 14.52 [ 7397 / 50948, 857 ins, 907 del, 5633 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgsmall_epoch2.adj/wer_17_0.0 +%WER 14.40 [ 7338 / 50948, 853 ins, 901 del, 5584 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgsmall_epoch3.adj/wer_18_0.0 +%WER 14.43 [ 7350 / 50948, 869 ins, 881 del, 5600 sub ] exp/nnet3/tdnn_sp_smbr/decode_dev_other_tgsmall_epoch4.adj/wer_18_0.0 +%WER 4.63 [ 2432 / 52576, 349 ins, 239 del, 1844 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_fglarge_epoch1.adj/wer_14_1.0 +%WER 4.56 [ 2395 / 52576, 347 ins, 223 del, 1825 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_fglarge_epoch2.adj/wer_15_1.0 +%WER 4.56 [ 2397 / 52576, 361 ins, 209 del, 1827 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_fglarge_epoch3.adj/wer_14_1.0 +%WER 4.58 [ 2409 / 52576, 370 ins, 197 del, 1842 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_fglarge_epoch4.adj/wer_14_1.0 +%WER 4.82 [ 2535 / 52576, 406 ins, 227 del, 1902 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tglarge_epoch1.adj/wer_14_0.5 +%WER 4.73 [ 2486 / 52576, 404 ins, 208 del, 1874 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tglarge_epoch2.adj/wer_14_0.5 +%WER 4.73 [ 2489 / 52576, 355 ins, 250 del, 1884 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tglarge_epoch3.adj/wer_15_1.0 +%WER 4.75 [ 2496 / 52576, 408 ins, 200 del, 1888 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tglarge_epoch4.adj/wer_15_0.5 +%WER 5.78 [ 3040 / 52576, 412 ins, 295 del, 2333 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgmed_epoch1.adj/wer_13_0.5 +%WER 5.73 [ 3015 / 52576, 394 ins, 310 del, 2311 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgmed_epoch2.adj/wer_15_0.5 +%WER 5.76 [ 3027 / 52576, 404 ins, 296 del, 2327 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgmed_epoch3.adj/wer_14_0.5 +%WER 5.72 [ 3005 / 52576, 397 ins, 325 del, 2283 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgmed_epoch4.adj/wer_17_0.5 +%WER 6.55 [ 3443 / 52576, 454 ins, 360 del, 2629 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgsmall_epoch1.adj/wer_15_0.0 +%WER 6.47 [ 3402 / 52576, 456 ins, 339 del, 2607 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgsmall_epoch2.adj/wer_15_0.0 +%WER 6.45 [ 3389 / 52576, 456 ins, 335 del, 2598 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgsmall_epoch3.adj/wer_15_0.0 +%WER 6.44 [ 3387 / 52576, 466 ins, 326 del, 2595 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_clean_tgsmall_epoch4.adj/wer_15_0.0 +%WER 11.27 [ 5900 / 52343, 781 ins, 609 del, 4510 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_fglarge_epoch1.adj/wer_14_0.5 +%WER 11.14 [ 5832 / 52343, 762 ins, 621 del, 4449 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_fglarge_epoch2.adj/wer_16_0.5 +%WER 11.15 [ 5838 / 52343, 769 ins, 619 del, 4450 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_fglarge_epoch3.adj/wer_17_0.5 +%WER 11.30 [ 5913 / 52343, 708 ins, 701 del, 4504 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_fglarge_epoch4.adj/wer_16_1.0 +%WER 11.75 [ 6152 / 52343, 766 ins, 667 del, 4719 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tglarge_epoch1.adj/wer_15_0.5 +%WER 11.59 [ 6068 / 52343, 764 ins, 654 del, 4650 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tglarge_epoch2.adj/wer_17_0.5 +%WER 11.58 [ 6062 / 52343, 771 ins, 648 del, 4643 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tglarge_epoch3.adj/wer_18_0.5 +%WER 11.71 [ 6128 / 52343, 798 ins, 633 del, 4697 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tglarge_epoch4.adj/wer_17_0.5 +%WER 13.93 [ 7289 / 52343, 851 ins, 846 del, 5592 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgmed_epoch1.adj/wer_15_0.0 +%WER 13.77 [ 7209 / 52343, 816 ins, 830 del, 5563 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgmed_epoch2.adj/wer_17_0.0 +%WER 13.74 [ 7191 / 52343, 832 ins, 806 del, 5553 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgmed_epoch3.adj/wer_17_0.0 +%WER 13.72 [ 7179 / 52343, 853 ins, 783 del, 5543 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgmed_epoch4.adj/wer_17_0.0 +%WER 15.15 [ 7929 / 52343, 889 ins, 948 del, 6092 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgsmall_epoch1.adj/wer_14_0.0 +%WER 14.93 [ 7817 / 52343, 847 ins, 958 del, 6012 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgsmall_epoch2.adj/wer_16_0.0 +%WER 14.83 [ 7762 / 52343, 825 ins, 955 del, 5982 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgsmall_epoch3.adj/wer_17_0.0 +%WER 14.83 [ 7761 / 52343, 821 ins, 962 del, 5978 sub ] exp/nnet3/tdnn_sp_smbr/decode_test_other_tgsmall_epoch4.adj/wer_18_0.0 + # Results with nnet3 tdnn+chain model # local/chain/run_tdnn_6z.sh # (4 epoch training on speed-perturbed data) @@ -463,3 +549,89 @@ %WER 11.20 [ 5864 / 52343, 619 ins, 781 del, 4464 sub ] exp/chain/tdnn_6z_sp/decode_test_other_tglarge/wer_13_0.5 %WER 13.47 [ 7051 / 52343, 733 ins, 933 del, 5385 sub ] exp/chain/tdnn_6z_sp/decode_test_other_tgmed/wer_13_0.0 %WER 14.73 [ 7710 / 52343, 662 ins, 1209 del, 5839 sub ] exp/chain/tdnn_6z_sp/decode_test_other_tgsmall/wer_14_0.0 + +# Results with nnet3 tdnn+chain+sMBR +# local/chain/run_tdnn_6z_discriminative.sh +# a subset of the full list of results (using the acoustic model obtained at the end of the training): +%WER 3.92 [ 2133 / 54402, 320 ins, 170 del, 1643 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_fglarge_epoch12/wer_11_0.5 +%WER 4.03 [ 2193 / 54402, 288 ins, 213 del, 1692 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tglarge_epoch12/wer_11_1.0 +%WER 5.21 [ 2835 / 54402, 306 ins, 329 del, 2200 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgmed_epoch12/wer_11_0.5 +%WER 5.76 [ 3132 / 54402, 370 ins, 311 del, 2451 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgsmall_epoch12/wer_12_0.0 +%WER 10.41 [ 5304 / 50948, 536 ins, 695 del, 4073 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_fglarge_epoch12/wer_14_1.0 +%WER 10.94 [ 5576 / 50948, 665 ins, 650 del, 4261 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tglarge_epoch12/wer_14_0.5 +%WER 13.74 [ 7002 / 50948, 733 ins, 895 del, 5374 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgmed_epoch12/wer_12_0.0 +%WER 14.53 [ 7405 / 50948, 713 ins, 1033 del, 5659 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgsmall_epoch12/wer_15_0.0 +%WER 4.31 [ 2268 / 52576, 326 ins, 208 del, 1734 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_fglarge_epoch12/wer_11_1.0 +%WER 4.51 [ 2371 / 52576, 345 ins, 218 del, 1808 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tglarge_epoch12/wer_13_0.5 +%WER 5.63 [ 2959 / 52576, 402 ins, 297 del, 2260 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgmed_epoch12/wer_11_0.0 +%WER 6.15 [ 3234 / 52576, 388 ins, 365 del, 2481 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgsmall_epoch12/wer_11_0.5 +%WER 10.62 [ 5558 / 52343, 668 ins, 602 del, 4288 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_fglarge_epoch12/wer_13_0.5 +%WER 11.06 [ 5788 / 52343, 580 ins, 770 del, 4438 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tglarge_epoch12/wer_13_1.0 +%WER 13.66 [ 7151 / 52343, 755 ins, 932 del, 5464 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgmed_epoch12/wer_12_0.0 +%WER 14.75 [ 7721 / 52343, 895 ins, 841 del, 5985 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgsmall_epoch12/wer_12_0.0 + +# the full list of results +%WER 3.92 [ 2133 / 54402, 320 ins, 170 del, 1643 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_fglarge_epoch12/wer_11_0.5 +%WER 3.91 [ 2125 / 54402, 279 ins, 217 del, 1629 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_fglarge_epoch3/wer_11_1.0 +%WER 3.89 [ 2116 / 54402, 282 ins, 209 del, 1625 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_fglarge_epoch6/wer_11_1.0 +%WER 3.90 [ 2122 / 54402, 281 ins, 205 del, 1636 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_fglarge_epoch9/wer_11_1.0 +%WER 4.03 [ 2193 / 54402, 288 ins, 213 del, 1692 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tglarge_epoch12/wer_11_1.0 +%WER 4.03 [ 2194 / 54402, 267 ins, 239 del, 1688 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tglarge_epoch3/wer_12_1.0 +%WER 4.04 [ 2196 / 54402, 312 ins, 196 del, 1688 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tglarge_epoch6/wer_12_0.5 +%WER 4.03 [ 2190 / 54402, 284 ins, 213 del, 1693 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tglarge_epoch9/wer_11_1.0 +%WER 5.21 [ 2835 / 54402, 306 ins, 329 del, 2200 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgmed_epoch12/wer_11_0.5 +%WER 5.16 [ 2808 / 54402, 295 ins, 338 del, 2175 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgmed_epoch3/wer_11_0.5 +%WER 5.20 [ 2831 / 54402, 375 ins, 260 del, 2196 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgmed_epoch6/wer_10_0.0 +%WER 5.23 [ 2843 / 54402, 382 ins, 256 del, 2205 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgmed_epoch9/wer_10_0.0 +%WER 5.76 [ 3132 / 54402, 370 ins, 311 del, 2451 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgsmall_epoch12/wer_12_0.0 +%WER 5.77 [ 3140 / 54402, 393 ins, 294 del, 2453 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgsmall_epoch3/wer_11_0.0 +%WER 5.79 [ 3149 / 54402, 373 ins, 315 del, 2461 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgsmall_epoch6/wer_12_0.0 +%WER 5.76 [ 3135 / 54402, 354 ins, 330 del, 2451 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_clean_tgsmall_epoch9/wer_13_0.0 +%WER 10.41 [ 5304 / 50948, 536 ins, 695 del, 4073 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_fglarge_epoch12/wer_14_1.0 +%WER 10.36 [ 5280 / 50948, 591 ins, 637 del, 4052 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_fglarge_epoch3/wer_15_0.5 +%WER 10.42 [ 5309 / 50948, 608 ins, 622 del, 4079 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_fglarge_epoch6/wer_15_0.5 +%WER 10.40 [ 5297 / 50948, 615 ins, 607 del, 4075 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_fglarge_epoch9/wer_15_0.5 +%WER 10.94 [ 5576 / 50948, 665 ins, 650 del, 4261 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tglarge_epoch12/wer_14_0.5 +%WER 10.91 [ 5556 / 50948, 643 ins, 667 del, 4246 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tglarge_epoch3/wer_14_0.5 +%WER 10.90 [ 5551 / 50948, 646 ins, 652 del, 4253 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tglarge_epoch6/wer_14_0.5 +%WER 10.92 [ 5561 / 50948, 664 ins, 644 del, 4253 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tglarge_epoch9/wer_14_0.5 +%WER 13.74 [ 7002 / 50948, 733 ins, 895 del, 5374 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgmed_epoch12/wer_12_0.0 +%WER 13.64 [ 6949 / 50948, 658 ins, 1027 del, 5264 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgmed_epoch3/wer_13_0.0 +%WER 13.66 [ 6958 / 50948, 673 ins, 1005 del, 5280 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgmed_epoch6/wer_13_0.0 +%WER 13.68 [ 6970 / 50948, 685 ins, 998 del, 5287 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgmed_epoch9/wer_13_0.0 +%WER 14.53 [ 7405 / 50948, 713 ins, 1033 del, 5659 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgsmall_epoch12/wer_15_0.0 +%WER 14.44 [ 7355 / 50948, 716 ins, 974 del, 5665 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgsmall_epoch3/wer_14_0.0 +%WER 14.40 [ 7339 / 50948, 730 ins, 941 del, 5668 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgsmall_epoch6/wer_14_0.0 +%WER 14.50 [ 7388 / 50948, 746 ins, 946 del, 5696 sub ] exp/chain/tdnn_6z_sp_smbr/decode_dev_other_tgsmall_epoch9/wer_14_0.0 +%WER 4.31 [ 2268 / 52576, 326 ins, 208 del, 1734 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_fglarge_epoch12/wer_11_1.0 +%WER 4.29 [ 2257 / 52576, 315 ins, 215 del, 1727 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_fglarge_epoch3/wer_11_1.0 +%WER 4.30 [ 2260 / 52576, 320 ins, 213 del, 1727 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_fglarge_epoch6/wer_11_1.0 +%WER 4.29 [ 2258 / 52576, 324 ins, 210 del, 1724 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_fglarge_epoch9/wer_11_1.0 +%WER 4.51 [ 2371 / 52576, 345 ins, 218 del, 1808 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tglarge_epoch12/wer_13_0.5 +%WER 4.48 [ 2357 / 52576, 366 ins, 203 del, 1788 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tglarge_epoch3/wer_11_0.5 +%WER 4.47 [ 2349 / 52576, 363 ins, 208 del, 1778 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tglarge_epoch6/wer_12_0.5 +%WER 4.50 [ 2365 / 52576, 362 ins, 208 del, 1795 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tglarge_epoch9/wer_12_0.5 +%WER 5.63 [ 2959 / 52576, 402 ins, 297 del, 2260 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgmed_epoch12/wer_11_0.0 +%WER 5.64 [ 2965 / 52576, 402 ins, 310 del, 2253 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgmed_epoch3/wer_11_0.0 +%WER 5.61 [ 2947 / 52576, 397 ins, 296 del, 2254 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgmed_epoch6/wer_11_0.0 +%WER 5.62 [ 2953 / 52576, 391 ins, 323 del, 2239 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgmed_epoch9/wer_12_0.0 +%WER 6.15 [ 3234 / 52576, 388 ins, 365 del, 2481 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgsmall_epoch12/wer_11_0.5 +%WER 6.10 [ 3206 / 52576, 430 ins, 316 del, 2460 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgsmall_epoch3/wer_11_0.0 +%WER 6.13 [ 3223 / 52576, 422 ins, 326 del, 2475 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgsmall_epoch6/wer_12_0.0 +%WER 6.15 [ 3236 / 52576, 429 ins, 322 del, 2485 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_clean_tgsmall_epoch9/wer_12_0.0 +%WER 10.62 [ 5558 / 52343, 668 ins, 602 del, 4288 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_fglarge_epoch12/wer_13_0.5 +%WER 10.57 [ 5532 / 52343, 659 ins, 625 del, 4248 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_fglarge_epoch3/wer_13_0.5 +%WER 10.58 [ 5540 / 52343, 666 ins, 615 del, 4259 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_fglarge_epoch6/wer_13_0.5 +%WER 10.64 [ 5571 / 52343, 666 ins, 609 del, 4296 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_fglarge_epoch9/wer_13_0.5 +%WER 11.06 [ 5788 / 52343, 580 ins, 770 del, 4438 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tglarge_epoch12/wer_13_1.0 +%WER 11.11 [ 5813 / 52343, 563 ins, 812 del, 4438 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tglarge_epoch3/wer_13_1.0 +%WER 11.07 [ 5793 / 52343, 575 ins, 787 del, 4431 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tglarge_epoch6/wer_13_1.0 +%WER 11.03 [ 5776 / 52343, 577 ins, 774 del, 4425 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tglarge_epoch9/wer_13_1.0 +%WER 13.66 [ 7151 / 52343, 755 ins, 932 del, 5464 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgmed_epoch12/wer_12_0.0 +%WER 13.60 [ 7120 / 52343, 661 ins, 1077 del, 5382 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgmed_epoch3/wer_13_0.0 +%WER 13.68 [ 7158 / 52343, 737 ins, 962 del, 5459 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgmed_epoch6/wer_12_0.0 +%WER 13.64 [ 7141 / 52343, 742 ins, 950 del, 5449 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgmed_epoch9/wer_12_0.0 +%WER 14.75 [ 7721 / 52343, 895 ins, 841 del, 5985 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgsmall_epoch12/wer_12_0.0 +%WER 14.64 [ 7664 / 52343, 818 ins, 956 del, 5890 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgsmall_epoch3/wer_13_0.0 +%WER 14.70 [ 7696 / 52343, 835 ins, 945 del, 5916 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgsmall_epoch6/wer_13_0.0 +%WER 14.75 [ 7722 / 52343, 892 ins, 849 del, 5981 sub ] exp/chain/tdnn_6z_sp_smbr/decode_test_other_tgsmall_epoch9/wer_12_0.0 diff --git a/egs/librispeech/s5/local/chain/run_tdnn_6z_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_6z_discriminative.sh index 944cfe255da..f1fc30acd3c 100755 --- a/egs/librispeech/s5/local/chain/run_tdnn_6z_discriminative.sh +++ b/egs/librispeech/s5/local/chain/run_tdnn_6z_discriminative.sh @@ -45,14 +45,12 @@ frames_overlap_per_eg=30 truncate_deriv_weights=10 ## Nnet training options -effective_learning_rate=0.00000125 +effective_learning_rate=0.000000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -modify_learning_rates=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -164,7 +162,7 @@ if [ -z "$degs_dir" ]; then if [ $stage -le 3 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then utils/create_split_dir.pl \ - /export/b{01,02,12,13}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage + /export/b{01,02,12,13}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage fi # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi @@ -192,17 +190,16 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ ${degs_dir} $dir ; fi -graph_dir=$srcdir/graph_tgsmall +graph_dir=${srcdir}/graph_test_tgsmall if [ $stage -le 5 ]; then for x in `seq $decode_start_epoch $num_epochs`; do for decode_set in test_clean test_other dev_clean dev_other; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch$[x*frame_subsampling_factor] steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh index 70a21609756..986efa4d4bd 100755 --- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh @@ -22,7 +22,7 @@ cleanup=false # run with --cleanup true --stage 6 to clean up (remove large thi . ./path.sh . ./utils/parse_options.sh -srcdir=exp/nnet3/tdnn +srcdir=exp/nnet3/tdnn_sp train_data_dir=data/train_960_sp_hires online_ivector_dir=exp/nnet3/ivectors_train_960_sp degs_dir= # If provided, will skip the degs directory creation @@ -40,7 +40,7 @@ frames_overlap_per_eg=30 truncate_deriv_weights=10 ## Nnet training options -effective_learning_rate=0.0000125 +effective_learning_rate=0.00000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 @@ -123,7 +123,7 @@ if [ -z "$degs_dir" ]; then if [ $stage -le 3 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then utils/create_split_dir.pl \ - /export/b{01,02,12,13}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage + /export/b{01,02,12,13}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage fi # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi @@ -155,7 +155,7 @@ if [ $stage -le 4 ]; then ${degs_dir} $dir fi -graph_dir=$srcdir/graph_tgsmall +graph_dir=exp/tri6b/graph_tgsmall if [ $stage -le 5 ]; then for x in `seq $decode_start_epoch $num_epochs`; do for decode_set in test_clean test_other dev_clean dev_other; do diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh index daf9906c6cd..7ace493aeda 100755 --- a/egs/wsj/s5/steps/nnet3/align.sh +++ b/egs/wsj/s5/steps/nnet3/align.sh @@ -124,7 +124,7 @@ ivector_opts= if [ ! -z "$online_ivector_dir" ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; # note: subsample-feats, with negative n, will repeat each feature -n times. - ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector_period=$ivector_period" + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" fi echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"