danpovey · xiaohui-zhang · Jul 25, 2018 · Jul 26, 2018 · Jul 26, 2018 · Jul 26, 2018
diff --git a/egs/aishell/s5/local/aishell_train_lms.sh b/egs/aishell/s5/local/aishell_train_lms.sh
@@ -23,7 +23,7 @@ kaldi_lm=`which train_lm.sh`
 if [ -z $kaldi_lm ]; then
   echo "$0: train_lm.sh is not found. That might mean it's not installed"
   echo "$0: or it is not added to PATH"
-  echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it"
+  echo "$0: Use the script tools/extras/install_kaldi_lm.sh to install it"
   exit 1
 fi
 

diff --git a/egs/aishell2/s5/local/train_lms.sh b/egs/aishell2/s5/local/train_lms.sh
@@ -24,7 +24,7 @@ kaldi_lm=`which train_lm.sh`
 if [ -z $kaldi_lm ]; then
   echo "$0: train_lm.sh is not found. That might mean it's not installed"
   echo "$0: or it is not added to PATH"
-  echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it"
+  echo "$0: Use the script tools/extras/install_kaldi_lm.sh to install it"
   exit 1
 fi
 

diff --git a/egs/callhome_diarization/v1/local/make_swbd2_phase1.pl b/egs/callhome_diarization/v1/local/make_swbd2_phase1.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/perl
+use warnings; #sed replacement for -w perl parameter
+#
+# Copyright   2017   David Snyder
+# Apache 2.0
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-LDC98S75> <path-to-output>\n";
+  print STDERR "e.g. $0 /export/corpora3/LDC/LDC98S75 data/swbd2_phase1_train\n";
+  exit(1);
+}
+($db_base, $out_dir) = @ARGV;
+
+if (system("mkdir -p $out_dir")) {
+  die "Error making directory $out_dir";
+}
+
+open(CS, "<$db_base/doc/callstat.tbl") || die  "Could not open $db_base/doc/callstat.tbl";
+open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
+open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
+
+@badAudio = ("3", "4");
+
+$tmp_dir = "$out_dir/tmp";
+if (system("mkdir -p $tmp_dir") != 0) {
+  die "Error making directory $tmp_dir";
+}
+
+if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
+  die "Error getting list of sph files";
+}
+
+open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list";
+
+%wavs = ();
+while(<WAVLIST>) {
+  chomp;
+  $sph = $_;
+  @t = split("/",$sph);
+  @t1 = split("[./]",$t[$#t]);
+  $uttId = $t1[0];
+  $wavs{$uttId} = $sph;
+}
+
+while (<CS>) {
+  $line = $_ ;
+  @A = split(",", $line);
+  @A1 = split("[./]",$A[0]);
+  $wav = $A1[0];
+  if (/$wav/i ~~ @badAudio) {
+    # do nothing
+    print "Bad Audio = $wav";
+  } else {
+    $spkr1= "sw_" . $A[2];
+    $spkr2= "sw_" . $A[3];
+    $gender1 = $A[5];
+    $gender2 = $A[6];
+    if ($gender1 eq "M") {
+      $gender1 = "m";
+    } elsif ($gender1 eq "F") {
+      $gender1 = "f";
+    } else {
+      die "Unknown Gender in $line";
+    }
+    if ($gender2 eq "M") {
+      $gender2 = "m";
+    } elsif ($gender2 eq "F") {
+      $gender2 = "f";
+    } else {
+      die "Unknown Gender in $line";
+    }
+    if (-e "$wavs{$wav}") {
+      $uttId = $spkr1 ."_" . $wav ."_1";
+      if (!$spk2gender{$spkr1}) {
+        $spk2gender{$spkr1} = $gender1;
+        print GNDR "$spkr1"," $gender1\n";
+      }
+      print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wavs{$wav} |\n";
+      print SPKR "$uttId"," $spkr1","\n";
+
+      $uttId = $spkr2 . "_" . $wav ."_2";
+      if (!$spk2gender{$spkr2}) {
+        $spk2gender{$spkr2} = $gender2;
+        print GNDR "$spkr2"," $gender2\n";
+      }
+      print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wavs{$wav} |\n";
+      print SPKR "$uttId"," $spkr2","\n";
+    } else {
+      print STDERR "Missing $wavs{$wav} for $wav\n";
+    }
+  }
+}
+
+close(WAV) || die;
+close(SPKR) || die;
+close(GNDR) || die;
+if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+if (system("utils/fix_data_dir.sh $out_dir") != 0) {
+  die "Error fixing data dir $out_dir";
+}
+if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/heroico/s5/RESULTS b/egs/heroico/s5/RESULTS
@@ -1,22 +1,48 @@
 # for dir in $(echo exp/tri*/decode* | grep -v 'si/'); do grep WER $dir/wer* | utils/best_wer.sh; done
 
-%WER 67.01 [ 5126 / 7650, 837 ins, 575 del, 3714 sub ] exp/tri1/decode_devtest/wer_14_1.0
-%WER 62.39 [ 4678 / 7498, 768 ins, 397 del, 3513 sub ] exp/tri1/decode_native/wer_13_1.0
-%WER 67.05 [ 6179 / 9215, 895 ins, 606 del, 4678 sub ] exp/tri1/decode_nonnative/wer_13_1.0
-%WER 64.97 [ 10859 / 16713, 1678 ins, 999 del, 8182 sub ] exp/tri1/decode_test/wer_13_1.0
-%WER 65.90 [ 5041 / 7650, 1016 ins, 416 del, 3609 sub ] exp/tri2b/decode_devtest/wer_12_1.0
-%WER 61.26 [ 4593 / 7498, 908 ins, 300 del, 3385 sub ] exp/tri2b/decode_native/wer_14_1.0
-%WER 67.51 [ 6221 / 9215, 1085 ins, 524 del, 4612 sub ] exp/tri2b/decode_nonnative/wer_14_1.0
-%WER 64.87 [ 10842 / 16713, 2004 ins, 838 del, 8000 sub ] exp/tri2b/decode_test/wer_14_1.0
-%WER 66.09 [ 5056 / 7650, 1078 ins, 402 del, 3576 sub ] exp/tri3b/decode_devtest/wer_16_1.0
-%WER 74.88 [ 5728 / 7650, 1210 ins, 426 del, 4092 sub ] exp/tri3b/decode_devtest.si/wer_15_1.0
-%WER 61.19 [ 4588 / 7498, 1038 ins, 255 del, 3295 sub ] exp/tri3b/decode_native/wer_14_1.0
-%WER 70.99 [ 5323 / 7498, 1185 ins, 301 del, 3837 sub ] exp/tri3b/decode_native.si/wer_16_1.0
-%WER 66.35 [ 6114 / 9215, 1186 ins, 421 del, 4507 sub ] exp/tri3b/decode_nonnative/wer_17_1.0
-%WER 76.36 [ 7037 / 9215, 1420 ins, 467 del, 5150 sub ] exp/tri3b/decode_nonnative.si/wer_16_1.0
-%WER 64.06 [ 10706 / 16713, 2245 ins, 657 del, 7804 sub ] exp/tri3b/decode_test/wer_15_1.0
-%WER 73.97 [ 12362 / 16713, 2608 ins, 766 del, 8988 sub ] exp/tri3b/decode_test.si/wer_16_1.0
-%WER 53.07 [ 4060 / 7650, 744 ins, 376 del, 2940 sub ] exp/chain/tdnn1e_sp/decode_devtest/wer_7_1.0
-%WER 54.47 [ 4084 / 7498, 536 ins, 475 del, 3073 sub ] exp/chain/tdnn1e_sp/decode_native/wer_7_1.0
-%WER 63.01 [ 5806 / 9215, 685 ins, 784 del, 4337 sub ] exp/chain/tdnn1e_sp/decode_nonnative/wer_7_1.0
-%WER 59.25 [ 9903 / 16713, 1226 ins, 1259 del, 7418 sub ] exp/chain/tdnn1e_sp/decode_test/wer_7_1.0
+# old results before adding Movie subtitles text corpus in LM training:
+# %WER 67.01 [ 5126 / 7650, 837 ins, 575 del, 3714 sub ] exp/tri1/decode_devtest/wer_14_1.0
+# %WER 62.39 [ 4678 / 7498, 768 ins, 397 del, 3513 sub ] exp/tri1/decode_native/wer_13_1.0
+# %WER 67.05 [ 6179 / 9215, 895 ins, 606 del, 4678 sub ] exp/tri1/decode_nonnative/wer_13_1.0
+# %WER 64.97 [ 10859 / 16713, 1678 ins, 999 del, 8182 sub ] exp/tri1/decode_test/wer_13_1.0
+# %WER 65.90 [ 5041 / 7650, 1016 ins, 416 del, 3609 sub ] exp/tri2b/decode_devtest/wer_12_1.0
+# %WER 61.26 [ 4593 / 7498, 908 ins, 300 del, 3385 sub ] exp/tri2b/decode_native/wer_14_1.0
+# %WER 67.51 [ 6221 / 9215, 1085 ins, 524 del, 4612 sub ] exp/tri2b/decode_nonnative/wer_14_1.0
+# %WER 64.87 [ 10842 / 16713, 2004 ins, 838 del, 8000 sub ] exp/tri2b/decode_test/wer_14_1.0
+# %WER 66.09 [ 5056 / 7650, 1078 ins, 402 del, 3576 sub ] exp/tri3b/decode_devtest/wer_16_1.0
+# %WER 74.88 [ 5728 / 7650, 1210 ins, 426 del, 4092 sub ] exp/tri3b/decode_devtest.si/wer_15_1.0
+# %WER 61.19 [ 4588 / 7498, 1038 ins, 255 del, 3295 sub ] exp/tri3b/decode_native/wer_14_1.0
+# %WER 70.99 [ 5323 / 7498, 1185 ins, 301 del, 3837 sub ] exp/tri3b/decode_native.si/wer_16_1.0
+# %WER 66.35 [ 6114 / 9215, 1186 ins, 421 del, 4507 sub ] exp/tri3b/decode_nonnative/wer_17_1.0
+# %WER 76.36 [ 7037 / 9215, 1420 ins, 467 del, 5150 sub ] exp/tri3b/decode_nonnative.si/wer_16_1.0
+# %WER 64.06 [ 10706 / 16713, 2245 ins, 657 del, 7804 sub ] exp/tri3b/decode_test/wer_15_1.0
+# %WER 73.97 [ 12362 / 16713, 2608 ins, 766 del, 8988 sub ] exp/tri3b/decode_test.si/wer_16_1.0
+# %WER 53.07 [ 4060 / 7650, 744 ins, 376 del, 2940 sub ] exp/chain/tdnn1e_sp/decode_devtest/wer_7_1.0
+# %WER 54.47 [ 4084 / 7498, 536 ins, 475 del, 3073 sub ] exp/chain/tdnn1e_sp/decode_native/wer_7_1.0
+# %WER 63.01 [ 5806 / 9215, 685 ins, 784 del, 4337 sub ] exp/chain/tdnn1e_sp/decode_nonnative/wer_7_1.0
+# %WER 59.25 [ 9903 / 16713, 1226 ins, 1259 del, 7418 sub ] exp/chain/tdnn1e_sp/decode_test/wer_7_1.0
+
+# new results:
+%WER 18.27 [ 1398 / 7650, 213 ins, 253 del, 932 sub ] exp/tri1/decode_devtest/wer_15_0.5
+%WER 9.95 [ 746 / 7498, 74 ins, 108 del, 564 sub ] exp/tri1/decode_native/wer_13_0.5
+%WER 16.63 [ 1532 / 9215, 197 ins, 183 del, 1152 sub ] exp/tri1/decode_nonnative/wer_17_0.0
+%WER 13.68 [ 2287 / 16713, 207 ins, 360 del, 1720 sub ] exp/tri1/decode_test/wer_17_0.5
+%WER 17.19 [ 1315 / 7650, 227 ins, 231 del, 857 sub ] exp/tri2b/decode_devtest/wer_17_0.5
+%WER 9.23 [ 692 / 7498, 60 ins, 103 del, 529 sub ] exp/tri2b/decode_native/wer_16_0.5
+%WER 17.16 [ 1581 / 9215, 184 ins, 216 del, 1181 sub ] exp/tri2b/decode_nonnative/wer_17_0.5
+%WER 13.64 [ 2279 / 16713, 241 ins, 326 del, 1712 sub ] exp/tri2b/decode_test/wer_17_0.5
+%WER 15.36 [ 1175 / 7650, 212 ins, 210 del, 753 sub ] exp/tri3b/decode_devtest/wer_17_0.5
+%WER 20.27 [ 1551 / 7650, 269 ins, 257 del, 1025 sub ] exp/tri3b/decode_devtest.si/wer_14_1.0
+%WER 6.40 [ 480 / 7498, 50 ins, 58 del, 372 sub ] exp/tri3b/decode_native/wer_16_0.0
+%WER 10.91 [ 818 / 7498, 100 ins, 112 del, 606 sub ] exp/tri3b/decode_native.si/wer_16_1.0
+%WER 14.30 [ 1318 / 9215, 206 ins, 134 del, 978 sub ] exp/tri3b/decode_nonnative/wer_17_0.0
+%WER 21.62 [ 1992 / 9215, 286 ins, 224 del, 1482 sub ] exp/tri3b/decode_nonnative.si/wer_16_1.0
+%WER 10.78 [ 1802 / 16713, 247 ins, 195 del, 1360 sub ] exp/tri3b/decode_test/wer_17_0.0
+%WER 16.81 [ 2809 / 16713, 374 ins, 338 del, 2097 sub ] exp/tri3b/decode_test.si/wer_16_1.0
+
+# chain model results:
+# for dir in $(echo exp/chain/tdnn1b_sp/decode* | grep -v 'si/'); do grep WER $dir/wer* | utils/best_wer.sh; done
+%WER 12.99 [ 994 / 7650, 192 ins, 163 del, 639 sub ] exp/chain/tdnn1b_sp/decode_devtest/wer_10_1.0
+%WER 12.47 [ 1149 / 9215, 119 ins, 174 del, 856 sub ] exp/chain/tdnn1b_sp/decode_nonnative/wer_12_0.0
+%WER 9.64 [ 1611 / 16713, 169 ins, 240 del, 1202 sub ] exp/chain/tdnn1b_sp/decode_test/wer_12_0.0
+%WER 6.13 [ 460 / 7498, 52 ins, 55 del, 353 sub ] exp/chain/tdnn1b_sp/decode_native/wer_10_0.0
diff --git a/...5/local/chain/tuning/run_cnn_tdnn_1a10.sh → .../s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/...5/local/chain/tuning/run_cnn_tdnn_1a10.sh → .../s5/local/chain/tuning/run_cnn_tdnn_1a.sh
@@ -1,13 +1,11 @@
 #!/bin/bash
 
-
-# run_cnn_tdnn_1a10.sh is modified from run_tdnn_1b.sh but taking
+# run_cnn_tdnn_1a.sh is modified from run_tdnn_1b.sh but taking
 #   the xconfig from mini-librispeech's run_cnn_tdnn_1a54.sh; only
 #   reducing the bottleneck-dim from 96 to 64, which is the value
-#   the run_tdnn1b.sh script here has.
-# Better!
-# local/chain/compare_wer.sh exp/chain/tdnn1a_sp exp/chain/tdnn1b_sp exp/chain/cnn_tdnn1a10_sp
-# System                  tdnn1a_sp tdnn1b_sp cnn_tdnn1a10_sp
+#   the run_tdnn1b.sh script here has. Results are better.
+# local/chain/compare_wer.sh exp/chain/tdnn1a_sp exp/chain/tdnn1b_sp exp/chain/cnn_tdnn1a_sp
+# System                  tdnn1a_sp tdnn1b_sp cnn_tdnn1a_sp
 # %WER        devtest       53.07     52.54     51.10
 # %WER           test       59.25     53.70     52.07
 # %WER         native       54.47     48.76     47.88
@@ -18,27 +16,6 @@
 # Final valid prob (xent)   -1.0719   -1.0849   -0.9915
 # Num-params                 6567648   3321312   3345088
 
-
-
-# 1b is as 1a but a re-tuned model with quite a few changes, including moving to
-#   a resnet-style factored TDNN-F model.
-#
-# local/chain/compare_wer.sh exp/chain/tdnn1a_sp exp/chain/tdnn1b_sp
-# System                  tdnn1a_sp tdnn1b_sp
-# %WER        devtest       53.07     52.54
-# %WER           test       59.25     53.70
-# %WER         native       54.47     48.76
-# %WER      nonnative       63.01     57.66
-# Final train prob          -0.0253   -0.0547
-# Final valid prob          -0.0687   -0.0694
-# Final train prob (xent)   -0.7715   -0.9502
-# Final valid prob (xent)   -1.0719   -1.0849
-# Num-params                 6567648   3321312
-
-
-# steps/info/chain_dir_info.pl  exp/chain/tdnn1b_sp
-# exp/chain/tdnn1b_sp: num-iters=34 nj=2..5 num-params=3.3M dim=40+100->1392 combine=-0.059->-0.059 (over 1) xent:train/valid[21,33,final]=(-1.28,-0.986,-0.950/-1.38,-1.10,-1.08) logprob:train/valid[21,33,final]=(-0.085,-0.063,-0.055/-0.090,-0.074,-0.069)
-
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
 
@@ -53,7 +30,7 @@ nnet3_affix=
 
 # The rest are configs specific to this script.  Most of the parameters
 # are just hardcoded at this level, in the commands below.
-affix=1a10   # affix for the TDNN directory name
+affix=1a   # affix for the TDNN directory name
 tree_affix=
 train_stage=-10
 get_egs_stage=-10

diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -1,19 +1,20 @@
 #!/bin/bash
 
 # local/chain/compare_wer.sh exp/chain/tdnn1a_sp
+# ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
 # System                  tdnn1a_sp
-# %WER        devtest       53.07
-# %WER           test       59.25
-# %WER         native       54.47
-# %WER      nonnative       63.01
-# Final train prob          -0.0253
-# Final valid prob          -0.0687
-# Final train prob (xent)   -0.7715
-# Final valid prob (xent)   -1.0719
-# Num-params                 6567648
+# %WER        devtest       13.10
+# %WER           test       15.53
+# %WER         native       10.14
+# %WER      nonnative       19.78
+# Final train prob          -0.0233
+# Final valid prob          -0.0720
+# Final train prob (xent)   -0.8107
+# Final valid prob (xent)   -0.9898
+# Num-params                 6559440
 
 # steps/info/chain_dir_info.pl  exp/chain/tdnn1a_sp/
-#exp/chain/tdnn1a_sp/: num-iters=105 nj=1..1 num-params=6.6M dim=40+100->1392 combine=-0.040->-0.033 (over 7) xent:train/valid[69,104,final]=(-1.12,-0.880,-0.771/-1.33,-1.21,-1.07) logprob:train/valid[69,104,final]=(-0.050,-0.031,-0.025/-0.079,-0.080,-0.069)
+# exp/chain/tdnn1a_sp: num-iters=105 nj=1..1 num-params=6.6M dim=40+100->1384 combine=-0.032->-0.026 (over 7) xent:train/valid[69,104,final]=(-1.14,-0.892,-0.811/-1.19,-1.07,-0.990) logprob:train/valid[69,104,final]=(-0.045,-0.029,-0.023/-0.083,-0.080,-0.072)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail

diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh
@@ -3,21 +3,20 @@
 # 1b is as 1a but a re-tuned model with quite a few changes, including moving to
 #   a resnet-style factored TDNN-F model.
 #
-# local/chain/compare_wer.sh exp/chain/tdnn1a_sp exp/chain/tdnn1b_sp
+# ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp exp/chain/tdnn1b_sp
 # System                  tdnn1a_sp tdnn1b_sp
-# %WER        devtest       53.07     52.54
-# %WER           test       59.25     53.70
-# %WER         native       54.47     48.76
-# %WER      nonnative       63.01     57.66
-# Final train prob          -0.0253   -0.0547
-# Final valid prob          -0.0687   -0.0694
-# Final train prob (xent)   -0.7715   -0.9502
-# Final valid prob (xent)   -1.0719   -1.0849
-# Num-params                 6567648   3321312
-
+# %WER        devtest       13.10     12.99
+# %WER           test       15.53      9.64
+# %WER         native       10.14      6.13
+# %WER      nonnative       19.78     12.47
+# Final train prob          -0.0233   -0.0442
+# Final valid prob          -0.0720   -0.0726
+# Final train prob (xent)   -0.8107   -0.9759
+# Final valid prob (xent)   -0.9898   -0.9964
+# Num-params                 6559440   3318224
 
 # steps/info/chain_dir_info.pl  exp/chain/tdnn1b_sp
-# exp/chain/tdnn1b_sp: num-iters=34 nj=2..5 num-params=3.3M dim=40+100->1392 combine=-0.059->-0.059 (over 1) xent:train/valid[21,33,final]=(-1.28,-0.986,-0.950/-1.38,-1.10,-1.08) logprob:train/valid[21,33,final]=(-0.085,-0.063,-0.055/-0.090,-0.074,-0.069)
+# exp/chain/tdnn1b_sp: num-iters=34 nj=2..5 num-params=3.3M dim=40+100->1384 combine=-0.044->-0.044 (over 1) xent:train/valid[21,33,final]=(-1.30,-0.993,-0.976/-1.28,-1.01,-0.996) logprob:train/valid[21,33,final]=(-0.071,-0.050,-0.044/-0.093,-0.076,-0.073)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail

diff --git a/egs/heroico/s5/local/heroico_answers_make_lists.pl b/egs/heroico/s5/local/heroico_answers_make_lists.pl
@@ -30,7 +30,7 @@
 my $t = "$tmpdir/answers/text";
 
 # initialize hash for prompts
-my %p = ();
+my %prompts = ();
 
 # store prompts in hash
 LINEA: while ( my $line = <> ) {
@@ -40,9 +40,27 @@
   my @dirs = split /\//, $directories;
   # get the speaker number
   my $s = $dirs[-1];
+  # pad the speaker number with zeroes
+  my $spk = "";
+  if ( $s < 10 ) {
+      $spk = '000' . $s;
+  } elsif ( $s < 100 ) {
+      $spk = '00' . $s;
+  } elsif ( $s < 1000 ) {
+      $spk = '0' . $s;
+  }
+  # pad the filename with zeroes
+  my $fn = "";
+  if ( $file < 10 ) {
+      $fn = '000' . $file;
+  } elsif ( $file < 100 ) {
+      $fn = '00' . $file;
+  } elsif ( $file < 1000 ) {
+      $fn = '0' . $file;
+  }
   # the utterance name
-  my $i = $s . '_' . 'a' . '_' . $file;
-  $p{$i} = $sent;
+  my $utt = $spk . '_' . $fn;
+  $prompts{$utt} = $sent;
 }
 
 open my $W, '<', $w or croak "problem with $w $!";
@@ -58,18 +76,36 @@
   my @dirs = split /\//, $directories;
   my $r = basename $line, ".wav";
   my $s = $dirs[-1];
-  my $rid = $s . '_' . 'a' . '_' . $r;
-  if ( exists $p{$rid} ) {
-    print $T "$rid $p{$rid}\n";
-  } elsif ( defined $rid ) {
-    warn  "warning: problem\t$rid";
+  my $spk = "";
+  # pad with zeroes
+  if ( $s < 10 ) {
+      $spk = '000' . $s;
+  } elsif ( $s < 100 ) {
+      $spk = '00' . $s;
+  } elsif ( $s < 1000 ) {
+      $spk = '0' . $s;
+  }
+  # pad the file name with zeroes
+  my $rec = "";
+  if ( $r < 10 ) {
+      $rec = '000' . $r;
+  } elsif ( $r < 100 ) {
+      $rec = '00' . $r;
+  } elsif ( $r < 1000 ) {
+      $rec = '0' . $r;
+  }
+  my $rec_id = $spk . '_' . $rec;
+  if ( exists $prompts{$rec_id} ) {
+    print $T "$rec_id $prompts{$rec_id}\n";
+  } elsif ( defined $rec_id ) {
+    warn  "warning: problem\t$rec_id";
     next LINE;
   } else {
     croak "$line";
   }
 
-  print $O "$rid sox -r 22050 -e signed -b 16 $line -r 16000 -t wav - |\n";
-  print $U "$rid ${s}_a\n";
+  print $O "$rec_id sox -r 22050 -e signed -b 16 $line -r 16000 -t wav - |\n";
+  print $U "$rec_id $spk\n";
 }
 close $T;
 close $O;