diff --git a/egs/cmu_cslu_kids/README b/egs/cmu_cslu_kids/README new file mode 100644 index 00000000000..0b8512e2487 --- /dev/null +++ b/egs/cmu_cslu_kids/README @@ -0,0 +1,21 @@ +This is an ASR recipe for children speech using cmu_kids and cslu_kids. +Both of the corpora can be found on LDC: + - cmu_kids : https://catalog.ldc.upenn.edu/LDC97S63 + - cslu_kids: https://catalog.ldc.upenn.edu/LDC2007S18 + +To run this recipe, you'll need a copy of both corpora: + ./run.sh --cmu_kids --cslu_kids + +By default, this recipe will download an LM pretrained on LibriSpeech from +lm_url=www.openslr.org/resources/11. If you already have a copy of this LM +and do not wish to redownload, you can specify the LM path using the --lm_src option: + ./run.sh --cmu_kids --cslu_kids \ + --lm_src + +This recipe will also download and clean CMU_Dict by default. If you have a clean copy +already, or wish to use your own dictionary, simply copy your version of the dict to + data/local/dict + +To run extra features for triphone models or VLTN, set the following options true: + ./run.sh --cmu_kids --cslu_kids \ + --vtln true --extra_features true diff --git a/egs/cmu_cslu_kids/s5/cmd.sh b/egs/cmu_cslu_kids/s5/cmd.sh new file mode 100644 index 00000000000..179307556d5 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/cmd.sh @@ -0,0 +1,23 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd=queue.pl +export decode_cmd="queue.pl --mem 2G" +# the use of cuda_cmd is deprecated, used only in 'nnet1', +export cuda_cmd="queue.pl --gpu 1" + +if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then + queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf, + export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2" + export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1" + export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G" +fi diff --git a/egs/cmu_cslu_kids/s5/conf/decode.config b/egs/cmu_cslu_kids/s5/conf/decode.config new file mode 100644 index 00000000000..10b0eee900b --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/decode.config @@ -0,0 +1,4 @@ +# Use wider-than-normal decoding beams for RM. +first_beam=16.0 +beam=20.0 +lattice_beam=10.0 diff --git a/egs/cmu_cslu_kids/s5/conf/decode_dnn.config b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config new file mode 100644 index 00000000000..e7cfca74763 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/decode_dnn.config @@ -0,0 +1,8 @@ +# In RM, the optimal decode LMWT is in range 2..5, which is different from usual 10..15 +# (it is caused by using simple rule-based LM, instead of n-gram LM), +scoring_opts="--min-lmwt 2 --max-lmwt 10" +# Still, it is better to use --acwt 0.1, both for decoding and sMBR, +acwt=0.1 +# For this small task we can afford to have large beams, +beam=30.0 # beam for decoding. Was 13.0 in the scripts. +lattice_beam=18.0 # this has most effect on size of the lattices. diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc.conf b/egs/cmu_cslu_kids/s5/conf/mfcc.conf new file mode 100644 index 00000000000..6bbcb763153 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--allow_downsample=true diff --git a/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..40f95e97010 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/mfcc_hires.conf @@ -0,0 +1,11 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) +--allow-downsample=true diff --git a/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/cmu_cslu_kids/s5/conf/plp.conf b/egs/cmu_cslu_kids/s5/conf/plp.conf new file mode 100644 index 00000000000..e7e8a9e14af --- /dev/null +++ b/egs/cmu_cslu_kids/s5/conf/plp.conf @@ -0,0 +1,2 @@ +# No non-default options for now. +--allow_downsample=true diff --git a/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh new file mode 100755 index 00000000000..8ee5db2326a --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/chain/compare_wer.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Num-params " +for x in $*; do + printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}') +done +echo diff --git a/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/chain/run_tdnnf.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh new file mode 100755 index 00000000000..8d124193584 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/chain/tdnnf_decode.sh @@ -0,0 +1,82 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Decode on new data set using trained model. +# The data directory should be prepared in kaldi style. +# Usage: +# ./local/chain/tdnnF_decode.sh --data_src + +set -euo pipefail +echo "$0 $@" + +stage=0 +decode_nj=10 +data_src= +affix= +tree_affix= +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null || true + + ( + nspk=$(wc -l <$data_hires/spk2utt) + steps/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk $frames_per_chunk \ + --nj $nspk --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir $ivect_dir \ + $tree_dir/graph_tgsmall $data_hires ${dir}/decode_tgsmall_$data_name || exit 1 + + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_test_{tgsmall,tglarge} \ + $data_hires ${dir}/decode_{tgsmall,tglarge}_$data_name || exit 1 + ) || touch $dir/.error & + + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + diff --git a/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..51e0123d0f2 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# Copyright 2017-2018 Johns Hopkins University (author: Daniel Povey) +# 2017-2018 Yiming Wang +# 2019 Fei Wu + +# Based on material recipe for low-resource languages +# Factored TDNN with skip connectiong and splicing (two bottle neck layers) + +# WER results on dev +# Model LM Corpus WER(%) +# tdnn_1a tg_large Combined 11.72 +# tdnn_1a tg_small Combined 13.61 +# tdnn_1a tg_large CMU_Kids 17.26 +# tdnn_1a tg_small CMU_Kids 26.43 +# tdnn_1a tg_large CSLU_Kids 10.80 +# tdnn_1a tg_small CSLU_Kids 12.50 + +# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp +# exp/chain/tdnn1a_sp/: num-iters=342 nj=2..5 num-params=17.9M dim=40+100->3192 combine=-0.042->-0.041 (over 8) xent:train/valid[227,341,final]=(-0.451,-0.363,-0.346/-0.524,-0.466,-0.434) logprob:train/valid[227,341,final]=(-0.047,-0.043,-0.042/-0.058,-0.056,-0.054) + +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=10 +train_set=train +test_sets="test" +gmm=tri3 +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training chunk-options +chunk_width=140,100,160 +dropout_schedule='0,0@0.20,0.3@0.50,0' +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 8 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + +if [ $stage -le 11 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + opts="l2-regularize=0.004 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" + linear_opts="orthonormal-constraint=-1.0 l2-regularize=0.004" + output_opts="l2-regularize=0.002" + + mkdir -p $dir/configs + + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $opts dim=1024 + linear-component name=tdnn2l0 dim=256 $linear_opts input=Append(-1,0) + linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0) + relu-batchnorm-dropout-layer name=tdnn2 $opts input=Append(0,1) dim=1024 + linear-component name=tdnn3l dim=256 $linear_opts input=Append(-1,0) + relu-batchnorm-dropout-layer name=tdnn3 $opts dim=1024 input=Append(0,1) + linear-component name=tdnn4l0 dim=256 $linear_opts input=Append(-1,0) + linear-component name=tdnn4l dim=256 $linear_opts input=Append(0,1) + relu-batchnorm-dropout-layer name=tdnn4 $opts input=Append(0,1) dim=1024 + linear-component name=tdnn5l dim=256 $linear_opts + relu-batchnorm-dropout-layer name=tdnn5 $opts dim=1024 input=Append(0, tdnn3l) + linear-component name=tdnn6l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0) + relu-batchnorm-dropout-layer name=tdnn6 $opts input=Append(0,3) dim=1280 + linear-component name=tdnn7l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn7l dim=256 $linear_opts input=Append(0,3) + relu-batchnorm-dropout-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1024 + linear-component name=tdnn8l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn8l dim=256 $linear_opts input=Append(0,3) + relu-batchnorm-dropout-layer name=tdnn8 $opts input=Append(0,3) dim=1280 + linear-component name=tdnn9l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn9l dim=256 $linear_opts input=Append(-3,0) + relu-batchnorm-dropout-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn5l) dim=1024 + linear-component name=tdnn10l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn10l dim=256 $linear_opts input=Append(0,3) + relu-batchnorm-dropout-layer name=tdnn10 $opts input=Append(0,3) dim=1280 + linear-component name=tdnn11l0 dim=256 $linear_opts input=Append(-3,0) + linear-component name=tdnn11l dim=256 $linear_opts input=Append(-3,0) + relu-batchnorm-dropout-layer name=tdnn11 $opts input=Append(0,3,tdnn10l,tdnn9l,tdnn7l) dim=1024 + linear-component name=prefinal-l dim=256 $linear_opts + + relu-batchnorm-layer name=prefinal-chain input=prefinal-l $opts dim=1280 + linear-component name=prefinal-chain-l dim=256 $linear_opts + batchnorm-component name=prefinal-chain-batchnorm + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + + relu-batchnorm-layer name=prefinal-xent input=prefinal-l $opts dim=1280 + linear-component name=prefinal-xent-l dim=256 $linear_opts + batchnorm-component name=prefinal-xent-batchnorm + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.0 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=20 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.002 \ + --trainer.optimization.final-effective-lrate=0.0002 \ + --trainer.num-chunk-per-minibatch=128,64 \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 13 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgsmall \ + $tree_dir $tree_dir/graph_tgsmall || exit 1; +fi + +if [ $stage -le 14 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l tmp + cut -f 3- < tmp > out + + tr '[:lower:]' '[:upper:]' < out > tmp + tr -d '[:cntrl:]' < tmp > out + sent=$( out + tr '[:lower:]' '[:upper:]' < tmp > out + trans=$(> $data/$target/utt2spk + echo "$uttID $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $utt|" >> $data/$target/wav.scp + echo "$spkID f" >> $data/$target/spk2gender + echo "$uttID $sent" >> $data/$target/text + fi + done + fi + fi +done + +for d in $data/train $data/test; do + utils/utt2spk_to_spk2utt.pl $d/utt2spk > $d/spk2utt + utils/fix_data_dir.sh $d +done + +printf "\t total: %s; train: %s; test: %s.\n" "$total_cnt" "$train_cnt" "$test_cnt" +rm -f out tmp + +# Optional +# Get data duration, just for book keeping +# for data in $data/train $data/test; do +# ./local/data_duration.sh $data +# done +# + diff --git a/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh new file mode 100755 index 00000000000..735f87eca9f --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/cslu_aud_prep.sh @@ -0,0 +1,43 @@ +#/bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Called by local/cslu_DataPrep.shi + +Assignment() +{ + rnd=$((1+RANDOM % 100)) + if [ $rnd -le $test_percentage ]; then + target="test" + else + target="train" + fi +} +audio= +test_percentage=30 # Percent of data reserved as test set +debug=debug/cslu_dataprep_debug +data=data/data_cslu +. ./utils/parse_options.sh + +uttID=$(basename $audio) +uttID=${uttID%'.wav'} +sentID=${uttID: -3} +spkID=${uttID%$sentID} +sentID=${sentID%"0"} +sentID=$(echo "$sentID" | tr '[:lower:]' '[:upper:]' ) + +line=$(grep $sentID cslu/docs/all.map) + +if [ -z "$line" ]; then # Can't map utterance to transcript + echo $audio $sentID >> $debug +else + txt=$(echo $line | grep -oP '"\K.*?(?=")') + cap_txt=${txt^^} + Assignment + echo "$uttID $cap_txt" >> $data/$target/text + echo "$uttID $spkID" >> $data/$target/utt2spk + echo "$spkID f" >> $data/$target/spk2gender + echo "$uttID $audio" >> $data/$target/wav.scp +fi + diff --git a/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh new file mode 100755 index 00000000000..621179079b3 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/cslu_prepare_data.sh @@ -0,0 +1,49 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Prepares cslu_kids +# Should be run from egs/cmu_csli_kids + +set -e +Looper() +{ + # echo "Looping through $1" + for f in $1/*; do + if [ -d $f ]; then + Looper $f + else + ./local/cslu_aud_prep.sh --data $data --audio $f + fi + done +} + +data=data/data_cslu +corpus=cslu +. ./utils/parse_options.sh + +rm -f debug/cslu_dataprep_debug +mkdir -p debug +# File check, remove previous data and features files +for d in $data/test $data/train; do + mkdir -p $d + ./local/file_check.sh $d +done + +echo "Preparing cslu_kids..." +Looper $corpus/speech/scripted + +for d in $data/test $data/train; do + ./utils/utt2spk_to_spk2utt.pl $d + ./utils/fix_data_dir.sh $d +done +if [ -f debug/cslu_dataprep_debug ]; then + echo "Missing transcripts for some utterances. See cslu_dataprep_debug" +fi + +# Optional +# Get data duration, just for book keeping +# for data in data/data_cslu/test data/data_cslu/train; do +# ./local/data_duration.sh $data +# done diff --git a/egs/cmu_cslu_kids/s5/local/data_duration.sh b/egs/cmu_cslu_kids/s5/local/data_duration.sh new file mode 100755 index 00000000000..e838e365ea7 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/data_duration.sh @@ -0,0 +1,19 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Get duration of the utterance given data dir +set -eu +echo $0 $@ + +data_dir=$1 +mkdir -p duration + +./utils/data/get_utt2dur.sh $data_dir + +echo "$data_dir" +python local/sum_duration.py $data_dir/utt2dur +echo "" + + diff --git a/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh new file mode 100755 index 00000000000..0248dd0cae1 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/download_cmu_dict.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Copyright 2019 Fei Wu +set -eu +# Adapted from the local/prepare_dict script in +# the librispeech recipe. Download and prepare CMU_dict. +# For childresn speech ASR tasks, since the vocabulary in cmu_kids and +# cslu_kids is relatively easy comparing to librispeech, we use only the +# CMU_dict, and do not handle OOV with G2P. +# Should be run from egs/cmu_cslu_kids. +# Usage: +# local/download_cmu_dict.sh --dict_dir + +dict_dir=data/local/dict +OOV="" + +. ./utils/parse_options.sh || exit 1; +. ./path.sh || exit 1 + +if [ ! -d $dict_dir ]; then + echo "Downloading and preparing CMU dict" + svn co -r 12440 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict $dict_dir/raw_dict || exit 1; + + echo "Removing the pronunciation variant markers ..." + grep -v ';;;' $dict_dir/raw_dict/cmudict.0.7a | \ + perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; print; }' | \ + sort -u > $dict_dir/lexicon.txt || exit 1; + + tr -d '\r' < $dict_dir/raw_dict/cmudict.0.7a.symbols > $dict_dir/nonsilence_phones.txt + + echo "$OOV SIL" >> $dict_dir/lexicon.txt + + echo "SIL" > $dict_dir/silence_phones.txt + echo "SPN" >> $dict_dir/silence_phones.txt + echo "SIL" > $dict_dir/optional_silence.txt + + rm -rf $dict_dir/raw_dict +fi diff --git a/egs/cmu_cslu_kids/s5/local/download_lm.sh b/egs/cmu_cslu_kids/s5/local/download_lm.sh new file mode 100755 index 00000000000..382f313df7c --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/download_lm.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# Copyright 2014 Vassil Panayotov +# Apache 2.0 + +if [ $# -ne "2" ]; then + echo "Usage: $0 " + echo "e.g.: $0 http://www.openslr.org/resources/11 data/local/lm" + exit 1 +fi + +base_url=$1 +dst_dir=$2 + +# given a filename returns the corresponding file size in bytes +# The switch cases below can be autogenerated by entering the data directory and running: +# for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done +function filesize() { + case $1 in + "3-gram.arpa.gz") echo "759636181";; + "3-gram.pruned.1e-7.arpa.gz") echo "34094057";; + "3-gram.pruned.3e-7.arpa.gz") echo "13654242";; + "4-gram.arpa.gz") echo "1355172078";; + "g2p-model-5") echo "20098243";; + "librispeech-lexicon.txt") echo "5627653";; + "librispeech-lm-corpus.tgz") echo "1803499244";; + "librispeech-lm-norm.txt.gz") echo "1507274412";; + "librispeech-vocab.txt") echo "1737588";; + *) echo "";; + esac +} + +function check_and_download () { + [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; } + fname=$1 + echo "Downloading file '$fname' into '$dst_dir'..." + expect_size="$(filesize $fname)" + [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; } + if [[ -s $dst_dir/$fname ]]; then + # In the following statement, the first version works on linux, and the part + # after '||' works on Linux. + f=$dst_dir/$fname + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + if [[ "$fsize" -eq "$expect_size" ]]; then + echo "'$fname' already exists and appears to be complete" + return 0 + else + echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..." + fi + fi + wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || { + echo "Error while trying to download $fname!" + return 1 + } + f=$dst_dir/$fname + # In the following statement, the first version works on linux, and the part after '||' + # works on Linux. + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; } + return 0 +} + +mkdir -p $dst_dir + +for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz 4-gram.arpa.gz \ + g2p-model-5 librispeech-lm-corpus.tgz librispeech-vocab.txt librispeech-lexicon.txt; do + check_and_download $f || exit 1 +done + +cd $dst_dir +ln -sf 3-gram.pruned.1e-7.arpa.gz lm_tgmed.arpa.gz +ln -sf 3-gram.pruned.3e-7.arpa.gz lm_tgsmall.arpa.gz +ln -sf 3-gram.arpa.gz lm_tglarge.arpa.gz +ln -sf 4-gram.arpa.gz lm_fglarge.arpa.gz + +exit 0 diff --git a/egs/cmu_cslu_kids/s5/local/file_check.sh b/egs/cmu_cslu_kids/s5/local/file_check.sh new file mode 100755 index 00000000000..859f228058a --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/file_check.sh @@ -0,0 +1,17 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + + +printf "\t File Check in folder: %s.\n" "$1" + +WavScp="$1/wav.scp" +Text="$1/text" +Utt2Spk="$1/utt2spk" +Gend="$1/utt2gender" +Spk2Utt="$1/spk2utt" +rm -f $WavScp $Text $Utt2Spk $Gend $Spk2Utt + + + diff --git a/egs/cmu_cslu_kids/s5/local/format_lms.sh b/egs/cmu_cslu_kids/s5/local/format_lms.sh new file mode 100755 index 00000000000..b530f61d2d9 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/format_lms.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 Vassil Panayotov +# Apache 2.0 + +# Prepares the test time language model(G) transducers +# (adapted from wsj/s5/local/wsj_format_data.sh) + +. ./path.sh || exit 1; + +# begin configuration section +src_dir=data/lang +# end configuration section + +. utils/parse_options.sh || exit 1; + +set -e + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/a15/vpanayotov/data/lm" + echo ", where:" + echo " is the directory in which the language model is stored/downloaded" + echo "Options:" + echo " --src-dir # source lang directory, default data/lang" + exit 1 +fi + +lm_dir=$1 + +if [ ! -d $lm_dir ]; then + echo "$0: expected source LM directory $lm_dir to exist" + exit 1; +fi +if [ ! -f $src_dir/words.txt ]; then + echo "$0: expected $src_dir/words.txt to exist." + exit 1; +fi + + +tmpdir=data/local/lm_tmp.$$ +trap "rm -r $tmpdir" EXIT + +mkdir -p $tmpdir + +for lm_suffix in tgsmall tgmed; do + # tglarge is prepared by a separate command, called from run.sh; we don't + # want to compile G.fst for tglarge, as it takes a while. + test=${src_dir}_test_${lm_suffix} + mkdir -p $test + cp -r ${src_dir}/* $test + gunzip -c $lm_dir/lm_${lm_suffix}.arpa.gz | \ + arpa2fst --disambig-symbol=#0 \ + --read-symbol-table=$test/words.txt - $test/G.fst + utils/validate_lang.pl --skip-determinization-check $test || exit 1; +done + +echo "Succeeded in formatting data." + +exit 0 diff --git a/egs/cmu_cslu_kids/s5/local/make_lm.pl b/egs/cmu_cslu_kids/s5/local/make_lm.pl new file mode 100755 index 00000000000..80eea5a6198 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/make_lm.pl @@ -0,0 +1,119 @@ +#!/usr/bin/env perl + +# Copyright 2010-2011 Yanmin Qian Microsoft Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This file takes as input the file wp_gram.txt that comes with the RM +# distribution, and creates the language model as an acceptor in FST form. + +# make_rm_lm.pl wp_gram.txt > G.txt + +if (@ARGV != 1) { + print "usage: make_rm_lm.pl wp_gram.txt > G.txt\n"; + exit(0); +} +unless (open(IN_FILE, "@ARGV[0]")) { + die ("can't open @ARGV[0]"); +} + + +$flag = 0; +$count_wrd = 0; +$cnt_ends = 0; +$init = ""; + +while ($line = ) +{ + chop($line); # Return the last char + + $line =~ s/ //g; # Selete all spaces + + if(($line =~ /^>/)) # If line has ">" + { + if($flag == 0) # Flip flag + { + $flag = 1; + } + $line =~ s/>//g; # Delete ">" + $hashcnt{$init} = $i; + $init = $line; + $i = 0; + $count_wrd++; + @LineArray[$count_wrd - 1] = $init; + $hashwrd{$init} = 0; + } + elsif($flag != 0) + { + + $hash{$init}[$i] = $line; + $i++; + if($line =~ /SENTENCE-END/) + { + $cnt_ends++; + } + } + else + {} +} + +$hashcnt{$init} = $i; + +$num = 0; +$weight = 0; +$init_wrd = "SENTENCE-END"; +$hashwrd{$init_wrd} = @LineArray; +for($i = 0; $i < $hashcnt{$init_wrd}; $i++) +{ + $weight = -log(1/$hashcnt{$init_wrd}); + $hashwrd{$hash{$init_wrd}[$i]} = $i + 1; + print "0 $hashwrd{$hash{$init_wrd}[$i]} $hash{$init_wrd}[$i] $hash{$init_wrd}[$i] $weight\n"; +} +$num = $i; + +for($i = 0; $i < @LineArray; $i++) +{ + if(@LineArray[$i] eq 'SENTENCE-END') + {} + else + { + if($hashwrd{@LineArray[$i]} == 0) + { + $num++; + $hashwrd{@LineArray[$i]} = $num; + } + for($j = 0; $j < $hashcnt{@LineArray[$i]}; $j++) + { + $weight = -log(1/$hashcnt{@LineArray[$i]}); + if($hashwrd{$hash{@LineArray[$i]}[$j]} == 0) + { + $num++; + $hashwrd{$hash{@LineArray[$i]}[$j]} = $num; + } + if($hash{@LineArray[$i]}[$j] eq 'SENTENCE-END') + { + print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $weight\n" + } + else + { + print "$hashwrd{@LineArray[$i]} $hashwrd{$hash{@LineArray[$i]}[$j]} $hash{@LineArray[$i]}[$j] $hash{@LineArray[$i]}[$j] $weight\n"; + } + } + } +} + +print "$hashwrd{$init_wrd} 0\n"; +close(IN_FILE); + + diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh new file mode 100755 index 00000000000..095e85cc338 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/nnet3/compare_wer.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..c695f2c9f1c --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -euo pipefail + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train +test_sets="test" +gmm=tri3b + +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l 2041 combine=-0.47->-0.38 loglike:train/valid[20,31,combined]=(-0.62,-0.38,-0.37/-1.03,-1.03,-1.02) accuracy:train/valid[20,31,combined]=(0.79,0.87,0.87/0.70,0.72,0.72) + +# Below, comparing with the chain TDNN system. It's a little better with the +# small-vocab decoding. Both systems are probably super-badly tuned, and the +# chain system probably used too many jobs. +# +# local/nnet3/compare_wer.sh exp/chain/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp +# System tdnn1a_sp tdnn_lstm1a_sp +#WER dev_clean_2 (tgsmall) 18.43 17.37 +#WER dev_clean_2 (tglarge) 13.15 13.43 +# Final train prob -0.3933 +# Final valid prob -0.9662 +# Final train acc 0.8652 +# Final valid acc 0.7206 + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the TDNN directory name +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l 2041 combine=-0.71->-0.58 loglike:train/valid[20,31,combined]=(-2.78,-0.95,-0.57/-2.94,-1.31,-0.98) accuracy:train/valid[20,31,combined]=(0.48,0.75,0.81/0.45,0.67,0.71) + +# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +#WER dev_clean_2 (tgsmall) 17.67 17.01 +# [online:] 18.06 17.26 +#WER dev_clean_2 (tglarge) 13.43 12.63 +# [online:] 13.73 12.94 +# Final train prob -0.3660 -0.5680 +# Final valid prob -1.0236 -0.9771 +# Final train acc 0.8737 0.8067 +# Final valid acc 0.7222 0.7144 + + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1b # affix for the TDNN+LSTM directory name +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.3@0.50,0' + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l 2041 combine=-0.99->-0.81 loglike:train/valid[20,31,combined]=(-1.22,-0.69,-0.61/-1.34,-1.02,-0.91) accuracy:train/valid[20,31,combined]=(0.68,0.779,0.800/0.64,0.70,0.724) + + + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1c # affix for the TDNN+LSTM directory name +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.3@0.50,0' + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-batchnorm-layer name=tdnn1 dim=520 $tdnn_opts + relu-batchnorm-layer name=tdnn2 dim=520 $tdnn_opts input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-batchnorm-layer name=tdnn3 dim=520 $tdnn_opts input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn4 dim=520 $tdnn_opts input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-batchnorm-layer name=tdnn5 dim=520 $tdnn_opts input=Append(-3,0,3) + relu-batchnorm-layer name=tdnn6 dim=520 $tdnn_opts input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + + output-layer name=output input=lstm3 $output_opts output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l data/lang/G.fst || exit 1; + +# Checking that G is stochastic [note, it wouldn't be for an Arpa] +fstisstochastic data/lang/G.fst || echo Error: G is not stochastic + +# Checking that G.fst is determinizable. +fstdeterminize data/lang/G.fst /dev/null || echo Error determinizing G. + +# Checking that L_disambig.fst is determinizable. +fstdeterminize data/lang/L_disambig.fst /dev/null || echo Error determinizing L. + +# Checking that disambiguated lexicon times G is determinizable +fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \ + fstdeterminize >/dev/null || echo Error + +# Checking that LG is stochastic: +fsttablecompose data/lang/L.fst data/lang/G.fst | \ + fstisstochastic || echo Error: LG is not stochastic. + +# Checking that L_disambig.G is stochastic: +fsttablecompose data/lang/L_disambig.fst data/lang/G.fst | \ + fstisstochastic || echo Error: LG is not stochastic. + +echo "Succeeded preparing grammar for CMU_kids." diff --git a/egs/cmu_cslu_kids/s5/local/score.sh b/egs/cmu_cslu_kids/s5/local/score.sh new file mode 100755 index 00000000000..c812199fc98 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/score.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# 2014 Guoguo Chen +# Apache 2.0 + +[ -f ./path.sh ] && . ./path.sh + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +word_ins_penalty=0.0,0.5,1.0 +min_lmwt=7 +max_lmwt=17 +iter=final +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done + +# Note: the double level of quoting for the sed command +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done + +exit 0; diff --git a/egs/cmu_cslu_kids/s5/local/sort_result.sh b/egs/cmu_cslu_kids/s5/local/sort_result.sh new file mode 100755 index 00000000000..aedec9dc344 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/sort_result.sh @@ -0,0 +1,46 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Sorts and reports results in results/results.txt +# for all models in exp. Expects decode directories +# to be named as exp//decode* or exp/chain/tdnn*/decode* +# Should be run from egs/cmu_cslu_kids. + +res=${1:-"results/results.txt"} +exp=exp +mkdir -p results +rm -f $res + +echo "Sorting results in: " +echo "# ---------- GMM-HMM Models ----------" >> $res +for mdl in $exp/mono* $exp/tri*; do + echo " $mdl" + if [ -d $mdl ];then + for dec in $mdl/decode*;do + echo " $dec" + if [ -d $dec ];then + grep WER $dec/wer* | \ + sort -k2 -n > $dec/WERs + head -n 1 $dec/WERs >> $res + fi + done + fi +done + +echo "# ---------- DNN-HMM Models ----------" >> $res +# DNN results +for mdl in $exp/chain/tdnn*; do + echo " $mdl" + for dec in $mdl/decode*; do + if [ -d $dec ]; then + echo " $dec" + grep WER $dec/wer* | \ + sort -k2 -n > $dec/WERs + head -n 1 $dec/WERs >> $res + fi + done +done + +sed -i "s/:/ /g" $res diff --git a/egs/cmu_cslu_kids/s5/local/subset_dataset.sh b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh new file mode 100755 index 00000000000..050128247a4 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/subset_dataset.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez) +# Apache 2.0 + +# The following commands were used to generate the mini_librispeech dataset: +# +# Note that data generation is random. This could be fixed by +# providing a seed argument to the shuf program. + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\ + /export/a05/dgalvez/LibriSpeech/train-clean-5 5" + exit 1 +fi + +src_dir=$1 +dest_dir=$2 +dest_num_hours=$3 + +src=$(basename $src_dir) +dest=$(basename $dest_dir) +librispeech_dir=$(dirname $src_dir) + +# TODO: Possibly improve this to ensure gender balance and speaker +# balance. +# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data +src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \ +python -c ' +from __future__ import print_function +from sys import stdin +minutes_str = stdin.read().split() +print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))') +src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | sort -u | wc -l) +mkdir -p data/subset_tmp +grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | \ + shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \ + data/subset_tmp/${dest}_chapter_id_list.txt + +while read -r chapter_id || [[ -n "$chapter_id" ]]; do + chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d) + speaker_id=$(basename $(dirname $chapter_dir)) + mkdir -p $dest_dir/$speaker_id/ + cp -r $chapter_dir $dest_dir/$speaker_id/ +done < data/subset_tmp/${dest}_chapter_id_list.txt diff --git a/egs/cmu_cslu_kids/s5/local/sum_duration.py b/egs/cmu_cslu_kids/s5/local/sum_duration.py new file mode 100644 index 00000000000..0af7ba62151 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/sum_duration.py @@ -0,0 +1,15 @@ +# Sum duration obtained by using +# utils/data/get_utt2dur.sh + +import sys +file = sys.argv[1] +sum = 0 +with open(file, 'r') as fp: + line = fp.readline() + while(line): + toks = line.strip().split() + sum += float(toks[1]) + line = fp.readline() +fp.close() +h=sum/3600 +sys.stdout.write("%f hour data.\n"%h) diff --git a/egs/cmu_cslu_kids/s5/local/train_lms.sh b/egs/cmu_cslu_kids/s5/local/train_lms.sh new file mode 100755 index 00000000000..0807210be18 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/train_lms.sh @@ -0,0 +1,217 @@ +#!/bin/bash + +# This script trains LMs on the WSJ LM-training data. +# It requires that you have already run wsj_extend_dict.sh, +# to get the larger-size dictionary including all of CMUdict +# plus any OOVs and possible acronyms that we could easily +# derive pronunciations for. + +dict_suffix= + +echo "$0 $@" # Print the command line for logging +. utils/parse_options.sh || exit 1; + +dir=data/local/local_lm +srcdir=data/local/dict${dict_suffix}_larger +mkdir -p $dir +. ./path.sh || exit 1; # for KALDI_ROOT +export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH +( # First make sure the kaldi_lm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d kaldi_lm ]; then + echo Not installing the kaldi_lm toolkit since it is already there. + else + echo Downloading and installing the kaldi_lm tools + if [ ! -f kaldi_lm.tar.gz ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + fi + tar -xvzf kaldi_lm.tar.gz || exit 1; + cd kaldi_lm + make || exit 1; + echo Done making the kaldi_lm tools + fi +) || exit 1; + + + +if [ ! -f $srcdir/cleaned.gz -o ! -f $srcdir/lexicon.txt ]; then + echo "Expecting files $srcdir/cleaned.gz and $srcdir/lexicon.txt to exist"; + echo "You need to run local/wsj_extend_dict.sh before running this script." + exit 1; +fi + +# Get a wordlist-- keep everything but silence, which should not appear in +# the LM. +awk '{print $1}' $srcdir/lexicon.txt | grep -v -w '!SIL' > $dir/wordlist.txt + +# Get training data with OOV words (w.r.t. our current vocab) replaced with . +echo "Getting training data with OOV words replaced with (train_nounk.gz)" +gunzip -c $srcdir/cleaned.gz | awk -v w=$dir/wordlist.txt \ + 'BEGIN{while((getline0) v[$1]=1;} + {for (i=1;i<=NF;i++) if ($i in v) printf $i" ";else printf " ";print ""}'|sed 's/ $//g' \ + | gzip -c > $dir/train_nounk.gz + +# Get unigram counts (without bos/eos, but this doens't matter here, it's +# only to get the word-map, which treats them specially & doesn't need their +# counts). +# Add a 1-count for each word in word-list by including that in the data, +# so all words appear. +gunzip -c $dir/train_nounk.gz | cat - $dir/wordlist.txt | \ + awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \ + sort -nr > $dir/unigram.counts + +# Get "mapped" words-- a character encoding of the words that makes the common words very short. +cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map + +gunzip -c $dir/train_nounk.gz | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=1;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz + +# To save disk space, remove the un-mapped training data. We could +# easily generate it again if needed. +rm $dir/train_nounk.gz + +train_lm.sh --arpa --lmtype 3gram-mincount $dir +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826 +# 7.8 million N-grams. + +prune_lm.sh --arpa 6.0 $dir/3gram-mincount/ +# 1.45 million N-grams. +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139 + +train_lm.sh --arpa --lmtype 4gram-mincount $dir +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180 +# 10.3 million N-grams. + +prune_lm.sh --arpa 7.0 $dir/4gram-mincount +# 1.50 million N-grams +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757 + + +exit 0 + +### Below here, this script is showing various commands that +## were run during LM tuning. + +train_lm.sh --arpa --lmtype 3gram-mincount $dir +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 141.444826 +# 7.8 million N-grams. + +prune_lm.sh --arpa 3.0 $dir/3gram-mincount/ +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 156.408740 +# 2.5 million N-grams. + +prune_lm.sh --arpa 6.0 $dir/3gram-mincount/ +# 1.45 million N-grams. +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 165.394139 + +train_lm.sh --arpa --lmtype 4gram-mincount $dir +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 126.734180 +# 10.3 million N-grams. + +prune_lm.sh --arpa 3.0 $dir/4gram-mincount +#Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 143.206294 +# 2.6 million N-grams. + +prune_lm.sh --arpa 4.0 $dir/4gram-mincount +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 146.927717 +# 2.15 million N-grams. + +prune_lm.sh --arpa 5.0 $dir/4gram-mincount +# 1.86 million N-grams +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 150.162023 + +prune_lm.sh --arpa 7.0 $dir/4gram-mincount +# 1.50 million N-grams +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 155.663757 + +train_lm.sh --arpa --lmtype 3gram $dir +# Perplexity over 228518.000000 words (excluding 478.000000 OOVs) is 135.692866 +# 20.0 million N-grams + +! which ngram-count \ + && echo "SRILM tools not installed so not doing the comparison" && exit 1; + +################# +# You could finish the script here if you wanted. +# Below is to show how to do baselines with SRILM. +# You'd have to install the SRILM toolkit first. + +heldout_sent=10000 # Don't change this if you want result to be comparable with + # kaldi_lm results +sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities. +mkdir -p $sdir +gunzip -c $srcdir/cleaned.gz | head -$heldout_sent > $sdir/cleaned.heldout +gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent > $sdir/cleaned.train +(echo ""; echo "" ) | cat - $dir/wordlist.txt > $sdir/wordlist.final.s + +# 3-gram: +ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \ + -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/cleaned.heldout # consider -debug 2 +#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs +#0 zeroprobs, logprob= -491456 ppl= 141.457 ppl1= 177.437 + +# Trying 4-gram: +ngram-count -text $sdir/cleaned.train -order 4 -limit-vocab -vocab $sdir/wordlist.final.s -unk \ + -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o4g.kn.gz +ngram -order 4 -lm $sdir/srilm.o4g.kn.gz -ppl $sdir/cleaned.heldout +#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs +#0 zeroprobs, logprob= -480939 ppl= 127.233 ppl1= 158.822 + +#3-gram with pruning: +ngram-count -text $sdir/cleaned.train -order 3 -limit-vocab -vocab $sdir/wordlist.final.s -unk \ + -prune 0.0000001 -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.pr7.kn.gz +ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout +#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 478 OOVs +#0 zeroprobs, logprob= -510828 ppl= 171.947 ppl1= 217.616 +# Around 2.25M N-grams. +# Note: this is closest to the experiment done with "prune_lm.sh --arpa 3.0 $dir/3gram-mincount/" +# above, which gave 2.5 million N-grams and a perplexity of 156. + +# Note: all SRILM experiments above fully discount all singleton 3 and 4-grams. +# You can use -gt3min=0 and -gt4min=0 to stop this (this will be comparable to +# the kaldi_lm experiments above without "-mincount". + +## From here is how to train with +# IRSTLM. This is not really working at the moment. + +if [ -z $IRSTLM ] ; then + export IRSTLM=$KALDI_ROOT/tools/irstlm/ +fi +export PATH=${PATH}:$IRSTLM/bin +if ! command -v prune-lm >/dev/null 2>&1 ; then + echo "$0: Error: the IRSTLM is not available or compiled" >&2 + echo "$0: Error: We used to install it by default, but." >&2 + echo "$0: Error: this is no longer the case." >&2 + echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2 + echo "$0: Error: and run extras/install_irstlm.sh" >&2 + exit 1 +fi + +idir=$dir/irstlm +mkdir $idir +gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | add-start-end.sh | \ + gzip -c > $idir/train.gz + +dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no + cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\ +{print $0;}}' > vocab.irstlm.20k + + +build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz -p yes \ + -n 3 -s improved-kneser-ney -b yes +# Testing perplexity with SRILM tools: +ngram -lm $idir/lm_3gram.gz -ppl $sdir/cleaned.heldout +#data/local/local_lm/irstlm/lm_3gram.gz: line 162049: warning: non-zero probability for in closed-vocabulary LM +#file data/local/local_lm/srilm/cleaned.heldout: 10000 sentences, 218996 words, 0 OOVs +#0 zeroprobs, logprob= -513670 ppl= 175.041 ppl1= 221.599 + +# Perplexity is very bad (should be ~141, since we used -p option, +# not 175), +# but adding -debug 3 to the command line shows that +# the IRSTLM LM does not seem to sum to one properly, so it seems that +# it produces an LM that isn't interpretable in the normal way as an ARPA +# LM. + + + diff --git a/egs/cmu_cslu_kids/s5/local/vtln.sh b/egs/cmu_cslu_kids/s5/local/vtln.sh new file mode 100755 index 00000000000..0ca179ce89f --- /dev/null +++ b/egs/cmu_cslu_kids/s5/local/vtln.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +# Run VTLN. This will be run if the vtln option +# is set to be true in run.sh. + +set -eu +stage=0 +featdir=mfcc/vtln +data=data +mdl=exp/tri3 +mdl_vtln=${mdl}_vtln +vtln_lda=exp/tri4 +vtln_sat=exp/tri5 + +. ./cmd.sh +. ./utils/parse_options.sh + +mkdir -p $featdir + +steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 $data/train $data/lang $mdl $mdl_vtln + +if [ $stage -le 0 ]; then + mkdir -p $data/train_vtln + cp $data/train/* $data/train_vtln || true + cp $mdl_vtln/final.warp $data/train_vtln/spk2warp + steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/train_vtln exp/make_mfcc/train_vtln $featdir + steps/compute_cmvn_stats.sh $data/train_vtln exp/make_mfcc/train_vtln $featdir +fi + +if [ $stage -le 1 ]; then + utils/mkgraph.sh $data/lang_test_tgmed $mdl_vtln $mdl_vtln/graph + steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ + $mdl_vtln/graph $data/test $mdl_vtln/decode +fi + +if [ $stage -le 2 ]; then + mkdir -p $data/test_vtln + cp $data/test/* $data/test_vtln || true + cp $mdl_vtln/decode/final.warp $data/test_vtln/spk2warp + steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" $data/test_vtln exp/make_mfcc/test_vtln $featdir + steps/compute_cmvn_stats.sh $data/test_vtln exp/make_mfcc/test_vtln $featdir +fi + +if [ $stage -le 3 ]; then + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 \ + $data/train_vtln $data/lang $mdl_vtln $vtln_lda + utils/mkgraph.sh $data/lang_test_tgmed $vtln_lda $vtln_lda/graph + echo "$mdl_vtln + lda + mllt" > $vtln_lda/mcodel_discription + steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ + $vtln_lda/graph $data/test_vtln $vtln_lda/decode +fi + +if [ $stage -le 4 ]; then + steps/train_sat.sh 1800 9000 $data/train_vtln $data/lang $vtln_lda $vtln_sat + utils/mkgraph.sh $data/lang_test_tgmed $vtln_sat $vtln_sat/graph + steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" $vtln_sat/graph $data/test_vtln $vtln_sat/decode + echo "$mdl_vtln + lda + mllt + SAT" > $vtln_sat/model_discription +fi diff --git a/egs/cmu_cslu_kids/s5/path.sh b/egs/cmu_cslu_kids/s5/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/cmu_cslu_kids/s5/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/cmu_cslu_kids/s5/run.sh b/egs/cmu_cslu_kids/s5/run.sh new file mode 100755 index 00000000000..43ae1ea9426 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/run.sh @@ -0,0 +1,177 @@ +#! /bin/bash + +# Copyright Johns Hopkins University +# 2019 Fei Wu + +set -eo + +stage=0 +cmu_kids= # path to cmu_kids corpus +cslu_kids= # path to cslu_kids corpus +lm_src= # path of existing librispeech lm +extra_features=false # Extra features for GMM model (MMI, boosting and MPE) +vtln=false # Optional, run VLTN on gmm and tdnnf models if set true +email= # Reporting email for tdnn-f training + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +lm_url=www.openslr.org/resources/11 +mkdir -p data +mkdir -p data/local + +# Prepare data +if [ $stage -le 0 ]; then + # Make soft link to the corpora + if [ ! -e cmu_kids ]; then + ln -sf $cmu_kids cmu_kids + fi + if [ ! -e cslu ]; then + ln -sf $cslu_kids cslu + fi + + # Make softlink to lm, if lm_src provided + if [ ! -z "$lm_src" ] && [ ! -e data/local/lm ] ; then + ln -sf $lm_src data/local/lm + fi + + # Remove old data dirs + rm -rf data/data_cmu + rm -rf data/data_cslu + + # Data Prep + ./local/cmu_prepare_data.sh --corpus cmu_kids/kids --data data/data_cmu + ./local/cslu_prepare_data.sh --corpus cslu --data data/data_cslu +fi + +# Combine data +if [ $stage -le 1 ]; then + mkdir -p data/train + mkdir -p data/test + rm -rf data/train/* + rm -rf data/test/* + ./utils/combine_data.sh data/train data/data_cmu/train data/data_cslu/train + ./utils/combine_data.sh data/test data/data_cmu/test data/data_cslu/test +fi + +# LM, WFST Preparation +if [ $stage -le 2 ]; then + if [ ! -d data/local/dict ]; then + ./local/download_cmu_dict.sh + fi + + if [ ! -e data/local/lm ]; then + echo "lm_src not provided. Downloading lm from openslr." + ./local/download_lm.sh $lm_url data/local/lm + fi + + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang + local/format_lms.sh --src_dir data/lang data/local/lm + + # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs + utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge + utils/build_const_arpa_lm.sh data/local/lm/lm_fglarge.arpa.gz data/lang data/lang_test_fglarge +fi + +# Make MFCC features +if [ $stage -le 3 ]; then + mkdir -p mfcc + mkdir -p exp + steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/test exp/make_feat/test mfcc + steps/compute_cmvn_stats.sh data/test exp/make_feat/test mfcc + steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" data/train exp/make_feat/train mfcc + steps/compute_cmvn_stats.sh data/train exp/make_feat/train mfcc +fi + +# Mono-phone +if [ $stage -le 4 ]; then + # Train + steps/train_mono.sh --nj 40 --cmd "$train_cmd" data/train data/lang exp/mono + #Decode + utils/mkgraph.sh data/lang_test_tgsmall exp/mono exp/mono/graph + steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode + #Align + steps/align_si.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali +fi + +# Tri1 [Vanilla tri phone model] +if [ $stage -le 5 ]; then + # Train + steps/train_deltas.sh --cmd "$train_cmd" 1800 9000 data/train data/lang exp/mono_ali exp/tri1 + # Decode + utils/mkgraph.sh data/lang_test_tgmed exp/tri1 exp/tri1/graph + steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode + # Align - make graph - decode again + steps/align_si.sh --nj 20 --cmd "queue.pl" --use-graphs true data/train data/lang_test_tgmed exp/tri1 exp/tri1_ali + utils/mkgraph.sh data/lang_test_tgmed exp/tri1_ali exp/tri1_ali/graph + steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri1_ali/graph data/test exp/tri1_ali/decode +fi + +# Add LDA and MLLT +if [ $stage -le 6 ]; then + # Train + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" 1800 9000 data/train data/lang exp/tri1_ali exp/tri2 + utils/mkgraph.sh data/lang_test_tgmed exp/tri2 exp/tri2/graph + # Decode + steps/decode.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode + # Align - make graph - dcode again + steps/align_si.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri2 exp/tri2_ali + utils/mkgraph.sh data/lang_test_tgmed exp/tri2_ali exp/tri2_ali/graph + steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri2_ali/graph data/test exp/tri2_ali/decode +fi + +# Add other features +if [ $stage -le 7 ]; then + if [ $extra_features = true ]; then + # Add MMI + steps/make_denlats.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/tri2 exp/tri2_denlats + steps/train_mmi.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi + steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it4 + steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi/decode_it3 + + # Add Boosting + steps/train_mmi.sh --boost 0.05 data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mmi_b0.05 + steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it4 + steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mmi_b0.05/decode_it3 + + # Add MPE + steps/train_mpe.sh data/train data/lang exp/tri2_ali exp/tri2_denlats exp/tri2_mpe + steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it4 + steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2_mpe/decode_it3 + fi +fi + +# Add SAT +if [ $stage -le 8 ]; then + # Do LDA+MLLT+SAT, and decode. + steps/train_sat.sh 1800 9000 data/train data/lang exp/tri2_ali exp/tri3 + utils/mkgraph.sh data/lang_test_tgmed exp/tri3 exp/tri3/graph + steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode +fi + +if [ $stage -le 9 ]; then + # Align all data with LDA+MLLT+SAT system (tri3) + steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" --use-graphs true data/train data/lang_test_tgmed exp/tri3 exp/tri3_ali + utils/mkgraph.sh data/lang_test_tgmed exp/tri3_ali exp/tri3_ali/graph + steps/decode_fmllr.sh --config conf/decode.config --nj 40 --cmd "$decode_cmd" exp/tri3_ali/graph data/test exp/tri3_ali/decode +fi + +if [ $stage -le 10 ]; then + # Uncomment reporting email option to get training progress updates by email + ./local/chain/run_tdnnf.sh --train_set train \ + --test_sets test --gmm tri3 # --reporting_email $email +fi + + +# Optional VTLN. Run if vtln is set to true +if [ $stage -le 11 ]; then + if [ $vtln = true ]; then + ./local/vtln.sh + ./local/chain/run_tdnnf.sh --nnet3_affix vtln --train_set train_vtln \ + --test_sets test_vtln --gmm tri5 # --reporting_email $email + fi +fi + +# Collect and resport WER results for all models +./local/sort_result.sh diff --git a/egs/cmu_cslu_kids/s5/steps b/egs/cmu_cslu_kids/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/cmu_cslu_kids/s5/utils b/egs/cmu_cslu_kids/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/cmu_cslu_kids/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file