danpovey · pegahgh · Feb 5, 2016 · Feb 5, 2016 · Feb 5, 2016 · Feb 5, 2016
diff --git a/.gitattributes b/.gitattributes
@@ -15,4 +15,6 @@ windows/INSTALL*   eol=native
 windows/NewGuidCmd.exe.config text eol=crlf
 windows/NewGuidCmd.exe binary
 
+# Prevent git changing CR-LF to LF when archiving (patch requires CR-LF on Windows).
+**/*.patch            -text
 
diff --git a/.gitignore b/.gitignore
@@ -6,11 +6,12 @@
 !/src/*/Makefile
 !/src/*/README
 
-# Compiled Object files
+# Compiled Object files and python ciles
 *.slo
 *.lo
 *.o
 *.obj
+*.pyc
 
 # Compiled Dynamic libraries
 *.so

diff --git a/egs/ami/s5/cmd.sh b/egs/ami/s5/cmd.sh
@@ -1,37 +1,38 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 1G"
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated but it is sometimes still used in nnet1
+# scripts.
+export cuda_cmd="queue.pl --gpu 1 --mem 20G"
+
+# the rest of this file is present for historical reasons.
+# In general it's best to rely on conf/queue.conf for cluster-specific
+# configuration.
 
 # On Eddie use:
 #export train_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=08:00:00"
 #export decode_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=05:00:00 -pe memory-2G 4"
 #export highmem_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=05:00:00 -pe memory-2G 4"
 #export scoring_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=00:20:00"
 
-# JSALT2015 workshop, cluster AWS-EC2, (setup from Vijay)
-export train_cmd="queue.pl -l arch=*64* --mem 1G"
-export decode_cmd="queue.pl -l arch=*64* --mem 2G"
-export highmem_cmd="queue.pl -l arch=*64* --mem 4G"
-export scoring_cmd="queue.pl -l arch=*64*"
-export cuda_cmd="queue.pl --gpu 1 -l mem_free=20G,ram_free=20G"
-export cntk_decode_cmd="queue.pl -l arch=*64* --mem 1G -pe smp 2"
-
-# To run locally, use:
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export highmem_cmd=run.pl
-#export cuda_cmd=run.pl
-
 if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
   # BUT cluster:
   queue="all.q@@blade,all.q@@speech"
-  gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
+  gpu_queue="long.q@@gpu"
   storage="matylda5"
-  export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
-  export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
+  export train_cmd="queue.pl -q $queue -l ram_free=1.5G,mem_free=1.5G,${storage}=1"
+  export decode_cmd="queue.pl -q $queue -l ram_free=2.5G,mem_free=2.5G,${storage}=0.5"
   export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
-fi 
+fi
 
diff --git a/egs/ami/s5/local/nnet3/run_lstm.sh b/egs/ami/s5/local/nnet3/run_lstm.sh
@@ -18,7 +18,7 @@
 stage=0
 train_stage=-10
 mic=ihm
-use_ihm_ali=false 
+use_ihm_ali=false
 use_sat_alignments=false # if true, use tri4a alignments are used
                          # by default GMM-HMM systems are not built to this stage
                          # in SDM and MDM systems. So run the tri4a stage if you
@@ -66,7 +66,7 @@ decode_iter=
 
 echo "$0 $@"  # Print the command line for logging
 
-. cmd.sh
+. ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 

diff --git a/egs/ami/s5/run_ihm.sh b/egs/ami/s5/run_ihm.sh
@@ -10,13 +10,13 @@ mic=ihm
 stage=0
 . utils/parse_options.sh
 
-# Set bash to 'debug' mode, it prints the commands (option '-x') and exits on : 
+# Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :
 # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',
 set -euxo pipefail
 
 # Path where AMI gets downloaded (or where locally available):
-AMI_DIR=$PWD/wav_db # Default, 
-case $(hostname -d) in 
+AMI_DIR=$PWD/wav_db # Default,
+case $(hostname -d) in
   fit.vutbr.cz) AMI_DIR=/mnt/scratch05/iveselyk/KALDI_AMI_WAV ;; # BUT,
   clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
   cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
@@ -86,7 +86,7 @@ if [ $stage -le 5 ]; then
     data/$mic/train data/lang exp/$mic/tri2a exp/$mic/tri2_ali
   # Decode,
   graph_dir=exp/$mic/tri2a/graph_${LM}
-  $highmem_cmd $graph_dir/mkgraph.log \
+  $cmd --mem 4G $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri2a $graph_dir
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
     $graph_dir data/$mic/dev exp/$mic/tri2a/decode_dev_${LM}
@@ -104,26 +104,26 @@ if [ $stage -le 6 ]; then
     data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_ali
   # Decode,
   graph_dir=exp/$mic/tri3a/graph_${LM}
-  $highmem_cmd $graph_dir/mkgraph.log \
+  $cmd --mem 4G $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri3a $graph_dir
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
-    $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM} 
+    $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM}
   steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
     $graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${LM}
-fi 
+fi
 
 if [ $stage -le 7 ]; then
   # Train tri4a, which is LDA+MLLT+SAT,
   steps/train_sat.sh  --cmd "$train_cmd" \
     5000 80000 data/$mic/train data/lang exp/$mic/tri3a_ali exp/$mic/tri4a
-  # Decode,  
+  # Decode,
   graph_dir=exp/$mic/tri4a/graph_${LM}
   $highmem_cmd $graph_dir/mkgraph.log \
     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri4a $graph_dir
   steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd"  --config conf/decode.conf \
-    $graph_dir data/$mic/dev exp/$mic/tri4a/decode_dev_${LM} 
+    $graph_dir data/$mic/dev exp/$mic/tri4a/decode_dev_${LM}
   steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
-    $graph_dir data/$mic/eval exp/$mic/tri4a/decode_eval_${LM} 
+    $graph_dir data/$mic/eval exp/$mic/tri4a/decode_eval_${LM}
 fi
 
 nj_mmi=80
@@ -160,11 +160,11 @@ if [ $stage -le 11 ]; then
     decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_dev_${i}.mdl_${LM}
     steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
       --transform-dir exp/$mic/tri4a/decode_dev_${LM} --iter $i \
-      $graph_dir data/$mic/dev $decode_dir 
+      $graph_dir data/$mic/dev $decode_dir
     decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_eval_${i}.mdl_${LM}
     steps/decode.sh --nj $nj --cmd "$decode_cmd"  --config conf/decode.conf \
       --transform-dir exp/$mic/tri4a/decode_eval_${LM} --iter $i \
-      $graph_dir data/$mic/eval $decode_dir 
+      $graph_dir data/$mic/eval $decode_dir
   done
 fi
 
@@ -181,7 +181,7 @@ if [ $stage -le 13 ]; then
     --hidden-dim 950 \
     --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
     --use-sat-alignments true
-  
+
   local/online/run_nnet2_ms_sp_disc.sh  \
     --mic $mic  \
     --gmm-dir exp/$mic/tri4a \

diff --git a/egs/aspire/s5/local/nnet3/run_autoencoder.sh b/egs/aspire/s5/local/nnet3/run_autoencoder.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+# this is an example to show a "tdnn" system in raw nnet configuration
+# i.e. without a transition model
+
+. cmd.sh
+
+
+# At this script level we don't support not running on GPU, as it would be painfully slow.
+# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
+# --num-threads 16 and --minibatch-size 128.
+
+stage=0
+affix=
+train_stage=-10
+common_egs_dir=
+num_data_reps=10
+
+remove_egs=true
+
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+dir=exp/nnet3/tdnn_raw
+dir=$dir${affix:+_$affix}
+
+clean_data_dir=data/train
+data_dir=data/train_rvb
+targets_scp=$dir/targets.scp
+
+mkdir -p $dir
+
+# Create copies of clean feats with prefix "rev$x_" to match utterance names of
+# the noisy feats
+for x in `seq 1 $num_data_reps`; do
+  awk -v x=$x '{print "rev"x"_"$0}' $clean_data_dir/feats.scp | sort -k1,1 > $targets_scp
+done
+
+if [ $stage -le 9 ]; then
+  echo "$0: creating neural net configs";
+
+  num_targets=`feat-to-dim scp:$targets_scp - 2>/dev/null` || exit 1
+
+  # create the config files for nnet initialization
+  python steps/nnet3/tdnn/make_configs.py  \
+     --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0"  \
+     --feat-dir ${data_dir} \
+     --relu-dim=1024 \
+     --add-lda=false \
+     --objective-type=quadratic \
+     --add-final-sigmoid=false \
+     --include-log-softmax=false \
+     --use-presoftmax-prior-scale=false \
+     --num-targets=$num_targets \
+     $dir/configs || exit 1;
+fi
+
+if [ $stage -le 10 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/tdnn/train_raw_nnet.sh --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --cmvn-opts "--norm-means=false --norm-vars=false" \
+    --num-epochs 2 \
+    --num-jobs-initial 3 \
+    --num-jobs-final 16 \
+    --initial-effective-lrate 0.0017 \
+    --final-effective-lrate 0.00017 \
+    --egs-dir "$common_egs_dir" \
+    --remove-egs $remove_egs \
+    --use-gpu true \
+    --dense-targets true \
+    ${data_dir} $targets_scp $dir || exit 1
+fi
+
diff --git a/egs/aurora4/s5/cmd.sh b/egs/aurora4/s5/cmd.sh
@@ -1,29 +1,18 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
 
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64 --mem 2G"
-export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G"
-export big_memory_cmd="queue.pl -l arch=*64 --mem 8G"
+export train_cmd="queue.pl --mem 4G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
+# the use of cuda_cmd is deprecated but it's still used in some example scripts
+# here.
 export cuda_cmd="queue.pl --gpu 1"
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
diff --git a/egs/babel/s5/cmd.sh b/egs/babel/s5/cmd.sh
@@ -1,29 +1,15 @@
-# "queue.pl" uses qsub.  The options to it are
-# options to qsub.  If you have GridEngine installed,
-# change this to a queue you have access to.
-# Otherwise, use "run.pl", which will run jobs locally
-# (make sure your --num-jobs options are no more than
-# the number of cpus on your machine.
-
-#a) JHU cluster options
-export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="..."
-
-
-#b) BUT cluster options
-#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
-#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
-#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
-
-#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
-#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
-#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
-
-#c) run it locally...
-#export train_cmd=run.pl
-#export decode_cmd=run.pl
-#export cuda_cmd=run.pl
-#export mkgraph_cmd=run.pl
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"