diff --git a/egs/aishell2/s5/local/word_segmentation.py b/egs/aishell2/s5/local/word_segmentation.py
index eb7bb648970..4ce55a2003e 100644
--- a/egs/aishell2/s5/local/word_segmentation.py
+++ b/egs/aishell2/s5/local/word_segmentation.py
@@ -4,6 +4,7 @@
 #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 # Apache 2.0
 
+from __future__ import print_function
 import sys
 import jieba
 reload(sys)
diff --git a/egs/ami/s5/local/sort_bad_utts.py b/egs/ami/s5/local/sort_bad_utts.py
index f84fcb12608..baabdc73508 100644
--- a/egs/ami/s5/local/sort_bad_utts.py
+++ b/egs/ami/s5/local/sort_bad_utts.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
 import sys
 import argparse
 import logging
@@ -38,10 +39,10 @@ def GetSortedWers(utt_info_file):
     utt_wer_sorted = sorted(utt_wer, key = lambda k : k[1])
     try:
         import numpy as np
-        bins = range(0,105,5)
+        bins = list(range(0,105,5))
         bins.append(sys.float_info.max)
 
-        hist, bin_edges = np.histogram(map(lambda x: x[1], utt_wer_sorted),
+        hist, bin_edges = np.histogram([x[1] for x in utt_wer_sorted],
                                        bins = bins)
         num_utts = len(utt_wer)
         string = ''
diff --git a/egs/ami/s5/local/tfrnnlm/run_lstm.sh b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
index a298590a31d..d68fadb10f3 100755
--- a/egs/ami/s5/local/tfrnnlm/run_lstm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_lstm.sh
@@ -39,7 +39,7 @@ if [ $stage -le 3 ]; then
     decode_dir=${basedir}/decode_${decode_set}
 
     # Lattice rescoring
-    steps/lmrescore_rnnlm_lat.sh \
+    steps/tfrnnlm/lmrescore_rnnlm_lat.sh \
       --cmd "$tfrnnlm_cmd --mem 16G" \
       --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
index 15d237b0e12..7a95f38ba1e 100755
--- a/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
+++ b/egs/ami/s5/local/tfrnnlm/run_vanilla_rnnlm.sh
@@ -39,7 +39,7 @@ if [ $stage -le 3 ]; then
     decode_dir=${basedir}/decode_${decode_set}
 
     # Lattice rescoring
-    steps/lmrescore_rnnlm_lat.sh \
+    steps/tfrnnlm/lmrescore_rnnlm_lat.sh \
       --cmd "$tfrnnlm_cmd --mem 16G" \
       --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
diff --git a/egs/an4/s5/local/data_prep.py b/egs/an4/s5/local/data_prep.py
index 24cb9bffb07..9d8083f3b60 100644
--- a/egs/an4/s5/local/data_prep.py
+++ b/egs/an4/s5/local/data_prep.py
@@ -15,6 +15,7 @@
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
 import os
 import re
 import sys
diff --git a/egs/an4/s5/local/lexicon_prep.py b/egs/an4/s5/local/lexicon_prep.py
index 8d451daf869..3584fa86dfb 100644
--- a/egs/an4/s5/local/lexicon_prep.py
+++ b/egs/an4/s5/local/lexicon_prep.py
@@ -15,6 +15,7 @@
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
 import os
 import re
 import sys
diff --git a/egs/aspire/s5/local/multi_condition/create_uniform_segments.py b/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
index e7baafc028c..010811490ef 100755
--- a/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
+++ b/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
@@ -4,13 +4,14 @@
 # creates a segments file in the provided data directory
 # into uniform segments with specified window and overlap
 
+from __future__ import division
 import imp, sys, argparse, os, math, subprocess
 
 min_segment_length = 10 # in seconds
 def segment(total_length, window_length, overlap = 0):
   increment = window_length - overlap
   num_windows = int(math.ceil(float(total_length)/increment))
-  segments = map(lambda x: (x * increment, min( total_length, (x * increment) + window_length)), range(0, num_windows))
+  segments = [(x * increment, min( total_length, (x * increment) + window_length)) for x in range(0, num_windows)]
   if segments[-1][1] - segments[-1][0] < min_segment_length:
     segments[-2] = (segments[-2][0], segments[-1][1])
     segments.pop()
@@ -53,7 +54,7 @@ def prepare_segments_file(kaldi_data_dir, window_length, overlap):
   parser = argparse.ArgumentParser()
   parser.add_argument('--window-length', type = float, default = 30.0, help = 'length of the window used to cut the segment')
   parser.add_argument('--overlap', type = float, default = 5.0, help = 'overlap of neighboring windows')
-  parser.add_argument('data_dir', type=str, help='directory such as data/train')
+  parser.add_argument('data_dir', help='directory such as data/train')
 
   params = parser.parse_args()
 
diff --git a/egs/aspire/s5/local/multi_condition/fill_missing_recordings.py b/egs/aspire/s5/local/multi_condition/fill_missing_recordings.py
index e249e54e5f6..2b4bcddda69 100755
--- a/egs/aspire/s5/local/multi_condition/fill_missing_recordings.py
+++ b/egs/aspire/s5/local/multi_condition/fill_missing_recordings.py
@@ -38,14 +38,14 @@ def fill_ctm(input_ctm_file, output_ctm_file, recording_names):
 
   sys.stderr.write(str(" ".join(sys.argv)))
   parser = argparse.ArgumentParser(usage)
-  parser.add_argument('input_ctm_file', type=str, help='ctm file for the recordings')
-  parser.add_argument('output_ctm_file', type=str, help='ctm file for the recordings')
-  parser.add_argument('recording_name_file', type=str, help='file with names of the recordings')
+  parser.add_argument('input_ctm_file', help='ctm file for the recordings')
+  parser.add_argument('output_ctm_file', help='ctm file for the recordings')
+  parser.add_argument('recording_name_file', help='file with names of the recordings')
 
   params = parser.parse_args()
 
   try:
-    file_names = map(lambda x: x.strip(), open("{0}".format(params.recording_name_file)).readlines())
+    file_names = [x.strip() for x in open("{0}".format(params.recording_name_file)).readlines()]
   except IOError:
     raise Exception("Expected to find {0}".format(params.recording_name_file))
 
diff --git a/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py b/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
index cc06f58616a..1f06d3e7c3b 100755
--- a/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
+++ b/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
@@ -3,6 +3,7 @@
 
 # script to generate the file_patterns of the AIR database
 # see load_air.m file in AIR db to understand the naming convention
+from __future__ import print_function
 import sys, glob, re, os.path
 
 air_dir = sys.argv[1]
@@ -45,4 +46,4 @@
                 file_patterns.append(file_pattern+" "+output_file_name)
 file_patterns = list(set(file_patterns))
 file_patterns.sort()
-print "\n".join(file_patterns)
+print("\n".join(file_patterns))
diff --git a/egs/aspire/s5/local/multi_condition/normalize_wavs.py b/egs/aspire/s5/local/multi_condition/normalize_wavs.py
index dabf420d9f8..6e67d2113c1 100755
--- a/egs/aspire/s5/local/multi_condition/normalize_wavs.py
+++ b/egs/aspire/s5/local/multi_condition/normalize_wavs.py
@@ -3,6 +3,8 @@
 
 # normalizes the wave files provided in input file list with a common scaling factor
 # the common scaling factor is computed to 1/\sqrt(1/(total_samples) * \sum_i{\sum_j x_i(j)^2}) where total_samples is sum of all samples of all wavefiles. If the data is multi-channel then each channel is treated as a seperate wave files
+from __future__ import division
+from __future__ import print_function
 import argparse, scipy.io.wavfile, warnings, numpy as np, math
 
 def get_normalization_coefficient(file_list, is_rir, additional_scaling):
@@ -29,7 +31,7 @@ def get_normalization_coefficient(file_list, is_rir, additional_scaling):
         assert(rate == sampling_rate)
       else:
         sampling_rate = rate
-      data = data / dtype_max_value
+      data = data/dtype_max_value
       if is_rir:
         # just count the energy of the direct impulse response
         # this is treated as energy of signal from 0.001 seconds before impulse
@@ -55,8 +57,8 @@ def get_normalization_coefficient(file_list, is_rir, additional_scaling):
     except IOError:
       warnings.warn("Did not find the file {0}.".format(file))
   assert(total_samples > 0)
-  scaling_coefficient = np.sqrt(total_samples / total_energy)
-  print "Scaling coefficient is {0}.".format(scaling_coefficient)
+  scaling_coefficient = np.sqrt(total_samples/total_energy)
+  print("Scaling coefficient is {0}.".format(scaling_coefficient))
   if math.isnan(scaling_coefficient):
     raise Exception(" Nan encountered while computing scaling coefficient. This is mostly due to numerical overflow")
   return scaling_coefficient
diff --git a/egs/aspire/s5/local/multi_condition/read_rir.py b/egs/aspire/s5/local/multi_condition/read_rir.py
index a2e1c2052e2..04898bda760 100755
--- a/egs/aspire/s5/local/multi_condition/read_rir.py
+++ b/egs/aspire/s5/local/multi_condition/read_rir.py
@@ -29,9 +29,9 @@ def usage():
   #sys.stderr.write(" ".join(sys.argv)+"\n")
   parser = argparse.ArgumentParser(usage())
   parser.add_argument('--output-sampling-rate', type = int, default = 8000,  help = 'sampling rate of the output')
-  parser.add_argument('type', type = str, default = None,  help = 'database type', choices = ['air'])
-  parser.add_argument('input', type = str, default = None,  help = 'directory containing the multi-channel data for a particular recording, or file name or file-regex-pattern')
-  parser.add_argument('output_filename', type = str, default = None,  help = 'output filename (if "-" then output is written to output pipe)')
+  parser.add_argument('type', default = None,  help = 'database type', choices = ['air'])
+  parser.add_argument('input', default = None,  help = 'directory containing the multi-channel data for a particular recording, or file name or file-regex-pattern')
+  parser.add_argument('output_filename', default = None,  help = 'output filename (if "-" then output is written to output pipe)')
   params = parser.parse_args()
 
   if params.output_filename == "-":
diff --git a/egs/aspire/s5/local/multi_condition/reverberate_wavs.py b/egs/aspire/s5/local/multi_condition/reverberate_wavs.py
index 998a3ed5e74..f43e4a2f894 100755
--- a/egs/aspire/s5/local/multi_condition/reverberate_wavs.py
+++ b/egs/aspire/s5/local/multi_condition/reverberate_wavs.py
@@ -4,18 +4,20 @@
 # script to generate multicondition training data / dev data / test data
 import argparse, glob, math, os, random, scipy.io.wavfile, sys
 
-class list_cyclic_iterator:
+class list_cyclic_iterator(object):
   def __init__(self, list, random_seed = 0):
     self.list_index = 0
     self.list = list
     random.seed(random_seed)
     random.shuffle(self.list)
 
-  def next(self):
+  def __next__(self):
     item = self.list[self.list_index]
     self.list_index = (self.list_index + 1) % len(self.list)
     return item
 
+  next = __next__  # for Python 2
+
 def return_nonempty_lines(lines):
   new_lines = []
   for line in lines:
@@ -71,15 +73,15 @@ def return_nonempty_lines(lines):
   for i in range(len(wav_files)):
     wav_file = " ".join(wav_files[i].split()[1:])
     output_wav_file = wav_out_files[i]
-    impulse_file = impulses.next()
+    impulse_file = next(impulses)
     noise_file = ''
     snr = ''
     found_impulse = False
     if add_noise:
-      for i in xrange(len(impulse_noise_index)):
+      for i in range(len(impulse_noise_index)):
         if impulse_file in impulse_noise_index[i][0]:
-          noise_file = impulse_noise_index[i][1].next()
-          snr = snrs.next()
+          noise_file = next(impulse_noise_index[i][1])
+          snr = next(snrs)
           assert(len(wav_file.strip()) > 0)
           assert(len(impulse_file.strip()) > 0)
           assert(len(noise_file.strip()) > 0)
diff --git a/egs/babel/s5b/local/lonestar.py b/egs/babel/s5b/local/lonestar.py
index e1594e55ada..809f99b22cf 100755
--- a/egs/babel/s5b/local/lonestar.py
+++ b/egs/babel/s5b/local/lonestar.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 from pylauncher import *
 import pylauncher
 import sys
@@ -39,7 +40,7 @@ def KaldiLauncher(lo, **kwargs):
 
 	logfiles = list()
 	commands = list()
-	for q in xrange(lo.jobstart, lo.jobend+1):
+	for q in range(lo.jobstart, lo.jobend+1):
 		s = "bash " + lo.queue_scriptfile + " " + str(q) 
 		commands.append(s)
 
@@ -74,7 +75,7 @@ def KaldiLauncher(lo, **kwargs):
 			time.sleep(delay);
 			
 			lines=tail(10, logfile)
-			with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines)
+			with_status=[x for x in lines if re.search(r'with status (\d+)', x)]
 		
 			if len(with_status) == 0:
 				sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem"
@@ -98,7 +99,7 @@ def KaldiLauncher(lo, **kwargs):
 		sys.exit(-1);
 
 	#Remove service files. Be careful not to remove something that might be needed in problem diagnostics	
-	for i in xrange(len(commands)):
+	for i in range(len(commands)):
 		out_file=os.path.join(qdir, ce.outstring+str(i))
 
 		#First, let's wait on files missing (it might be that those are missing
@@ -149,7 +150,7 @@ def KaldiLauncher(lo, **kwargs):
 	
 	#print job.final_report()
 
-class LauncherOpts:
+class LauncherOpts(object):
 	def __init__(self):
 		self.sync=0
 		self.nof_threads = 1
@@ -199,7 +200,7 @@ def CmdLineParser(argv):
 		jobend=int(m.group(2))
 		argv.pop(0)
 	elif re.match("^.+=.*:.*$", argv[0]):
-		print >> sys.stderr, "warning: suspicious JOB argument " + argv[0];
+		print("warning: suspicious JOB argument " + argv[0], file=sys.stderr);
 
 	if jobstart > jobend:
 		sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n")
@@ -238,8 +239,8 @@ def setup_paths_and_vars(opts):
 	cwd = os.getcwd()
 
 	if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend):
-		print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \
-			"but you are putting the output into just one log file (" + opts.logfile + ")";
+		print("lonestar.py: you are trying to run a parallel job" \
+			"but you are putting the output into just one log file (" + opts.logfile + ")", file=sys.stderr);
 		sys.exit(1)
 
 	if not os.path.isabs(opts.logfile):
@@ -261,8 +262,8 @@ def setup_paths_and_vars(opts):
 	taskname=os.path.basename(queue_logfile)
 	taskname = taskname.replace(".log", "");
 	if taskname == "":
-		print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \
-			"that leads to an empty task name ("+logfile + ")";
+		print("lonestar.py: you specified the log file name in such form " \
+			"that leads to an empty task name ("+logfile + ")", file=sys.stderr);
 		sys.exit(1)
 
 	if not os.path.isabs(queue_logfile):
diff --git a/egs/babel/s5b/local/resegment/segmentation.py b/egs/babel/s5b/local/resegment/segmentation.py
index 7c5c8665a16..aed65a4ca14 100755
--- a/egs/babel/s5b/local/resegment/segmentation.py
+++ b/egs/babel/s5b/local/resegment/segmentation.py
@@ -3,6 +3,7 @@
 # Copyright 2014  Vimal Manohar
 # Apache 2.0
 
+from __future__ import division
 import os, glob, argparse, sys, re, time
 from argparse import ArgumentParser
 
@@ -19,12 +20,12 @@
 
 def mean(l):
   if len(l) > 0:
-    return float(sum(l)) / len(l)
+    return (float(sum(l))/len(l))
   return 0
 
 # Analysis class
 # Stores statistics like the confusion matrix, length of the segments etc.
-class Analysis:
+class Analysis(object):
   def __init__(self, file_id, frame_shift, prefix):
     self.confusion_matrix = [0] * 9
     self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ]
@@ -274,8 +275,8 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
     i = len(this_file)
     category = splits[6]
     word = splits[5]
-    start_time = int(float(splits[3])/frame_shift + 0.5)
-    duration = int(float(splits[4])/frame_shift + 0.5)
+    start_time = int((float(splits[3])/frame_shift) + 0.5)
+    duration = int((float(splits[4])/frame_shift) + 0.5)
     if i < start_time:
       this_file.extend(["0"]*(start_time - i))
     if type1 == "NON-LEX":
@@ -295,7 +296,7 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
 # Stats class to store some basic stats about the number of
 # times the post-processor goes through particular loops or blocks
 # of code in the algorithm. This is just for debugging.
-class Stats:
+class Stats(object):
   def __init__(self):
     self.inter_utt_nonspeech = 0
     self.merge_nonspeech_segment = 0
@@ -321,7 +322,7 @@ def reset(self):
     self.noise_only = 0
 
 # Timer class to time functions
-class Timer:
+class Timer(object):
   def __enter__(self):
     self.start = time.clock()
     return self
@@ -332,7 +333,7 @@ def __exit__(self, *args):
 # The main class for post-processing a file.
 # This does the segmentation either looking at the file isolated
 # or by looking at both classes simultaneously
-class JointResegmenter:
+class JointResegmenter(object):
   def __init__(self, P, A, f, options, phone_map, stats = None, reference = None):
 
     # Pointers to prediction arrays and Initialization
@@ -1290,22 +1291,22 @@ def main():
       dest='hard_max_segment_length', default=15.0, \
       help="Hard maximum on the segment length above which the segment " \
       + "will be broken even if in the middle of speech (default: %(default)s)")
-  parser.add_argument('--first-separator', type=str, \
+  parser.add_argument('--first-separator', \
       dest='first_separator', default="-", \
       help="Separator between recording-id and start-time (default: %(default)s)")
-  parser.add_argument('--second-separator', type=str, \
+  parser.add_argument('--second-separator', \
       dest='second_separator', default="-", \
       help="Separator between start-time and end-time (default: %(default)s)")
-  parser.add_argument('--remove-noise-only-segments', type=str, \
+  parser.add_argument('--remove-noise-only-segments', \
       dest='remove_noise_only_segments', default="true", choices=("true", "false"), \
       help="Remove segments that have only noise. (default: %(default)s)")
   parser.add_argument('--min-inter-utt-silence-length', type=float, \
       dest='min_inter_utt_silence_length', default=1.0, \
       help="Minimum silence that must exist between two separate utterances (default: %(default)s)");
-  parser.add_argument('--channel1-file', type=str, \
+  parser.add_argument('--channel1-file', \
       dest='channel1_file', default="inLine", \
       help="String that matches with the channel 1 file (default: %(default)s)")
-  parser.add_argument('--channel2-file', type=str, \
+  parser.add_argument('--channel2-file', \
       dest='channel2_file', default="outLine", \
       help="String that matches with the channel 2 file (default: %(default)s)")
   parser.add_argument('--isolated-resegmentation', \
@@ -1388,7 +1389,7 @@ def main():
 
   speech_cap = None
   if options.speech_cap_length != None:
-    speech_cap = int( options.speech_cap_length / options.frame_shift )
+    speech_cap = int(options.speech_cap_length/options.frame_shift)
   # End if
 
   for f in pred_files:
@@ -1454,7 +1455,7 @@ def main():
         f2 = f3
       # End if
 
-      if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift:
+      if (len(A1) - len(A2)) > int(options.max_length_diff/options.frame_shift):
         sys.stderr.write( \
             "%s: Warning: Lengths of %s and %s differ by more than %f. " \
             % (sys.argv[0], f1,f2, options.max_length_diff) \
diff --git a/egs/babel/s5c/local/lonestar.py b/egs/babel/s5c/local/lonestar.py
index e1594e55ada..809f99b22cf 100755
--- a/egs/babel/s5c/local/lonestar.py
+++ b/egs/babel/s5c/local/lonestar.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import print_function
 from pylauncher import *
 import pylauncher
 import sys
@@ -39,7 +40,7 @@ def KaldiLauncher(lo, **kwargs):
 
 	logfiles = list()
 	commands = list()
-	for q in xrange(lo.jobstart, lo.jobend+1):
+	for q in range(lo.jobstart, lo.jobend+1):
 		s = "bash " + lo.queue_scriptfile + " " + str(q) 
 		commands.append(s)
 
@@ -74,7 +75,7 @@ def KaldiLauncher(lo, **kwargs):
 			time.sleep(delay);
 			
 			lines=tail(10, logfile)
-			with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines)
+			with_status=[x for x in lines if re.search(r'with status (\d+)', x)]
 		
 			if len(with_status) == 0:
 				sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem"
@@ -98,7 +99,7 @@ def KaldiLauncher(lo, **kwargs):
 		sys.exit(-1);
 
 	#Remove service files. Be careful not to remove something that might be needed in problem diagnostics	
-	for i in xrange(len(commands)):
+	for i in range(len(commands)):
 		out_file=os.path.join(qdir, ce.outstring+str(i))
 
 		#First, let's wait on files missing (it might be that those are missing
@@ -149,7 +150,7 @@ def KaldiLauncher(lo, **kwargs):
 	
 	#print job.final_report()
 
-class LauncherOpts:
+class LauncherOpts(object):
 	def __init__(self):
 		self.sync=0
 		self.nof_threads = 1
@@ -199,7 +200,7 @@ def CmdLineParser(argv):
 		jobend=int(m.group(2))
 		argv.pop(0)
 	elif re.match("^.+=.*:.*$", argv[0]):
-		print >> sys.stderr, "warning: suspicious JOB argument " + argv[0];
+		print("warning: suspicious JOB argument " + argv[0], file=sys.stderr);
 
 	if jobstart > jobend:
 		sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n")
@@ -238,8 +239,8 @@ def setup_paths_and_vars(opts):
 	cwd = os.getcwd()
 
 	if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend):
-		print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \
-			"but you are putting the output into just one log file (" + opts.logfile + ")";
+		print("lonestar.py: you are trying to run a parallel job" \
+			"but you are putting the output into just one log file (" + opts.logfile + ")", file=sys.stderr);
 		sys.exit(1)
 
 	if not os.path.isabs(opts.logfile):
@@ -261,8 +262,8 @@ def setup_paths_and_vars(opts):
 	taskname=os.path.basename(queue_logfile)
 	taskname = taskname.replace(".log", "");
 	if taskname == "":
-		print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \
-			"that leads to an empty task name ("+logfile + ")";
+		print("lonestar.py: you specified the log file name in such form " \
+			"that leads to an empty task name ("+logfile + ")", file=sys.stderr);
 		sys.exit(1)
 
 	if not os.path.isabs(queue_logfile):
diff --git a/egs/babel/s5c/local/resegment/segmentation.py b/egs/babel/s5c/local/resegment/segmentation.py
index 7c5c8665a16..4bdb0fea75c 100755
--- a/egs/babel/s5c/local/resegment/segmentation.py
+++ b/egs/babel/s5c/local/resegment/segmentation.py
@@ -3,6 +3,7 @@
 # Copyright 2014  Vimal Manohar
 # Apache 2.0
 
+from __future__ import division
 import os, glob, argparse, sys, re, time
 from argparse import ArgumentParser
 
@@ -19,12 +20,12 @@
 
 def mean(l):
   if len(l) > 0:
-    return float(sum(l)) / len(l)
+    return (float(sum(l))/len(l))
   return 0
 
 # Analysis class
 # Stores statistics like the confusion matrix, length of the segments etc.
-class Analysis:
+class Analysis(object):
   def __init__(self, file_id, frame_shift, prefix):
     self.confusion_matrix = [0] * 9
     self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ]
@@ -274,7 +275,7 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
     i = len(this_file)
     category = splits[6]
     word = splits[5]
-    start_time = int(float(splits[3])/frame_shift + 0.5)
+    start_time = int((float(splits[3])/frame_shift) + 0.5)
     duration = int(float(splits[4])/frame_shift + 0.5)
     if i < start_time:
       this_file.extend(["0"]*(start_time - i))
@@ -295,7 +296,7 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
 # Stats class to store some basic stats about the number of
 # times the post-processor goes through particular loops or blocks
 # of code in the algorithm. This is just for debugging.
-class Stats:
+class Stats(object):
   def __init__(self):
     self.inter_utt_nonspeech = 0
     self.merge_nonspeech_segment = 0
@@ -321,7 +322,7 @@ def reset(self):
     self.noise_only = 0
 
 # Timer class to time functions
-class Timer:
+class Timer(object):
   def __enter__(self):
     self.start = time.clock()
     return self
@@ -332,7 +333,7 @@ def __exit__(self, *args):
 # The main class for post-processing a file.
 # This does the segmentation either looking at the file isolated
 # or by looking at both classes simultaneously
-class JointResegmenter:
+class JointResegmenter(object):
   def __init__(self, P, A, f, options, phone_map, stats = None, reference = None):
 
     # Pointers to prediction arrays and Initialization
@@ -351,9 +352,9 @@ def __init__(self, P, A, f, options, phone_map, stats = None, reference = None):
 
     self.frame_shift = options.frame_shift
     # Convert length in seconds to frames
-    self.max_frames = int(options.max_segment_length / options.frame_shift)
-    self.hard_max_frames = int(options.hard_max_segment_length / options.frame_shift)
-    self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length / options.frame_shift)
+    self.max_frames = int(options.max_segment_length/options.frame_shift)
+    self.hard_max_frames = int(options.hard_max_segment_length/options.frame_shift)
+    self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length, options.frame_shift)
     if ( options.remove_noise_only_segments == "false" ):
       self.remove_noise_segments = False
     elif ( options.remove_noise_only_segments == "true" ):
@@ -540,7 +541,7 @@ def set_nonspeech_proportion(self):
     # Set the number of non-speech frames to be added depending on the
     # silence proportion. The target number of frames in the segments
     # is computed as below:
-    target_segment_frames = int(num_speech_frames / (1.0 - self.options.silence_proportion))
+    target_segment_frames = int(num_speech_frames/(1.0 - self.options.silence_proportion))
 
     # The number of frames currently in the segments
     num_segment_frames = num_speech_frames
@@ -599,7 +600,7 @@ def set_nonspeech_proportion(self):
       if not changed:   # avoid an infinite loop. if no changes, then break.
         break
     if num_segment_frames < target_segment_frames:
-      proportion = float(num_segment_frames - num_speech_frames) / num_segment_frames
+      proportion = float(num_segment_frames - num_speech_frames)/num_segment_frames
       sys.stderr.write("%s: Warning: for recording %s, only got a proportion %f of non-speech frames, versus target %f\n" % (sys.argv[0], self.file_id, proportion, self.options.silence_proportion))
 
     ###########################################################################
@@ -863,14 +864,14 @@ def split_long_segments(self):
           # Count the number of times long segments are split
           self.stats.split_segments += 1
 
-          num_pieces = int((float(segment_length) / self.hard_max_frames) + 0.99999)
+          num_pieces = int((float(segment_length)/self.hard_max_frames) + 0.99999)
           sys.stderr.write("%s: Warning: for recording %s, " \
               % (sys.argv[0], self.file_id) \
               + "splitting segment of length %f seconds into %d pieces " \
               % (segment_length * self.frame_shift, num_pieces) \
               + "(--hard-max-segment-length %f)\n" \
               % self.options.hard_max_segment_length)
-          frames_per_piece = int(segment_length / num_pieces)
+          frames_per_piece = int(segment_length/num_pieces)
           for i in range(1,num_pieces):
             q = n + i * frames_per_piece
             self.S[q] = True
@@ -1290,22 +1291,22 @@ def main():
       dest='hard_max_segment_length', default=15.0, \
       help="Hard maximum on the segment length above which the segment " \
       + "will be broken even if in the middle of speech (default: %(default)s)")
-  parser.add_argument('--first-separator', type=str, \
+  parser.add_argument('--first-separator', \
       dest='first_separator', default="-", \
       help="Separator between recording-id and start-time (default: %(default)s)")
-  parser.add_argument('--second-separator', type=str, \
+  parser.add_argument('--second-separator', \
       dest='second_separator', default="-", \
       help="Separator between start-time and end-time (default: %(default)s)")
-  parser.add_argument('--remove-noise-only-segments', type=str, \
+  parser.add_argument('--remove-noise-only-segments', \
       dest='remove_noise_only_segments', default="true", choices=("true", "false"), \
       help="Remove segments that have only noise. (default: %(default)s)")
   parser.add_argument('--min-inter-utt-silence-length', type=float, \
       dest='min_inter_utt_silence_length', default=1.0, \
       help="Minimum silence that must exist between two separate utterances (default: %(default)s)");
-  parser.add_argument('--channel1-file', type=str, \
+  parser.add_argument('--channel1-file', \
       dest='channel1_file', default="inLine", \
       help="String that matches with the channel 1 file (default: %(default)s)")
-  parser.add_argument('--channel2-file', type=str, \
+  parser.add_argument('--channel2-file', \
       dest='channel2_file', default="outLine", \
       help="String that matches with the channel 2 file (default: %(default)s)")
   parser.add_argument('--isolated-resegmentation', \
@@ -1388,7 +1389,7 @@ def main():
 
   speech_cap = None
   if options.speech_cap_length != None:
-    speech_cap = int( options.speech_cap_length / options.frame_shift )
+    speech_cap = int(options.speech_cap_length/options.frame_shift)
   # End if
 
   for f in pred_files:
@@ -1454,7 +1455,7 @@ def main():
         f2 = f3
       # End if
 
-      if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift:
+      if (len(A1) - len(A2)) > int(options.max_length_diff/options.frame_shift):
         sys.stderr.write( \
             "%s: Warning: Lengths of %s and %s differ by more than %f. " \
             % (sys.argv[0], f1,f2, options.max_length_diff) \
diff --git a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py
index 68280762597..91419f6e920 100755
--- a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py
+++ b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py
@@ -106,6 +106,7 @@
 # Import Statements
 
 from __future__ import print_function
+from __future__ import division
 import codecs
 import argparse
 import unicodedata
@@ -340,7 +341,7 @@ def encode(unicode_transcription, tag_percentage, log=False):
     int2graph = {v: k for k, v in graph2int.items()}
     graph_list_int = [graph2int[g] for g in graph_list]
     bin_edges = range(0, len(int2graph.keys()) + 1)
-    graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0] / float(len(graph_list_int))
+    graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0]/float(len(graph_list_int))
     # Set count threshold to frequency that tags the bottom 10% of graphemes
     bottom_idx = int(np.floor(tag_percentage * len(graph_counts)))
     count_thresh = sorted(graph_counts)[bottom_idx]
@@ -465,7 +466,7 @@ def encode(unicode_transcription, tag_percentage, log=False):
     for g_dict in table:
         g_map = ""
         map_number = 0
-        for g_field, g_val in sorted(g_dict.iteritems()):
+        for g_field, g_val in sorted(g_dict.items()):
             if(g_field == ("MAP" + str(map_number))):
                 g_map = g_map + g_val + " "
                 map_number = map_number + 1
@@ -561,7 +562,7 @@ def write_table(table, outfile):
     # Start writing to output
     with codecs.open(outfile, "w", "utf-8") as fo:
         # Get header names
-        header_names = sorted(set().union(*[d.keys() for d in table]))
+        header_names = sorted(set().union(*[list(d.keys()) for d in table]))
         # Write headers
         for h in header_names[:-1]:
             fo.write("%s\t" % h)
@@ -595,7 +596,7 @@ def write_map(grapheme_map, mapfile):
 
     '''
     with codecs.open(mapfile, 'w', encoding='utf-8') as f:
-        for g, g_map in grapheme_map.iteritems():
+        for g, g_map in grapheme_map.items():
             print(g, g_map, file=f)
 
 
@@ -613,14 +614,14 @@ def write_lexicon(baseforms, encoded_transcription, outfile, sil_lex=None,
     with codecs.open(outfile, "w", "utf-8") as f:
         # First write the non-speech words
         try:
-            for w in sil_lex.iterkeys():
+            for w in sil_lex.keys():
                 f.write("%s\t%s\n" % (w, sil_lex[w]))
         except AttributeError:
             pass
         
         # Then write extra-speech words 
         try:
-            for w in extra_lex.iterkeys():
+            for w in extra_lex.keys():
                 f.write("%s\t%s\n" % (w, extra_lex[w]))
         except AttributeError:
             pass
@@ -629,9 +630,9 @@ def write_lexicon(baseforms, encoded_transcription, outfile, sil_lex=None,
         for idx, w in enumerate(baseforms):
             # This is really just for BABEL in case <hes> is written as a word
             if(w[0].lower() == "<hes>"):
-                f.write("%s\t<hes>\n" % (unicode(w[0])))
+                f.write("%s\t<hes>\n" % (w[0]))
             else:
-                f.write("%s\t%s\n" % (unicode(w[0]),
+                f.write("%s\t%s\n" % (w[0],
                                       encoded_transcription[idx]))
 
 if __name__ == "__main__":
diff --git a/egs/babel/s5d/local/lexicon/make_word_list.py b/egs/babel/s5d/local/lexicon/make_word_list.py
index 9a9e17f6c60..c1473b8ced8 100755
--- a/egs/babel/s5d/local/lexicon/make_word_list.py
+++ b/egs/babel/s5d/local/lexicon/make_word_list.py
@@ -85,7 +85,7 @@ def main():
     # Print the word list
     with codecs.open(args.word_list, "w", encoding="utf-8") as f:
         for word, count in words:
-            f.write("%d %s\n" % (count, unicode(word)))
+            f.write("%d %s\n" % (count, word))
 
     if args.misprons is not None:
         with codecs.open(args.misprons, "w", encoding="utf-8") as f:
diff --git a/egs/babel/s5d/local/prepare_unicode_lexicon.py b/egs/babel/s5d/local/prepare_unicode_lexicon.py
index 86fa4d60ba1..3b9dc1abd86 100755
--- a/egs/babel/s5d/local/prepare_unicode_lexicon.py
+++ b/egs/babel/s5d/local/prepare_unicode_lexicon.py
@@ -89,7 +89,7 @@ def extract_phonemes(lexicon):
     # Read all baseform units into dictionary with {a: [a, a_1, a_2],
     #                                               b: [b_1, b_3], ...}
     phonemes_dict = {}
-    for word, pron in lexicon.iteritems():
+    for word, pron in lexicon.items():
         for p in pron.split():
             try:
                 base = p.split("_",1)[0]
@@ -98,11 +98,11 @@ def extract_phonemes(lexicon):
                 phonemes_dict[base] = [p]
 
     # Makes sure there are no repeats in the list
-    phonemes_dict = {k: set(v) for k, v in phonemes_dict.iteritems()}
+    phonemes_dict = {k: set(v) for k, v in phonemes_dict.items()}
 
     # Get all unique phonemes
     phonemes = []
-    for v in phonemes_dict.itervalues():
+    for v in phonemes_dict.values():
         for p in v:
             phonemes.append(p)
 
@@ -137,11 +137,11 @@ def write_extra_questions(nonsil_phonemes, nonsil_phonemes_dict,
 
         # Write all possible phone_tag combinations that occur in the lexicon
         for tag in tags:
-            for p in nonsil_phonemes_dict.iterkeys():
+            for p in nonsil_phonemes_dict.keys():
                 tagged_phoneme = "_".join([p, tag])
                 if(tagged_phoneme in nonsil_phonemes_dict[p]):
                     fp.write("%s " % tagged_phoneme)
-            for p in sil_phonemes_dict.iterkeys():
+            for p in sil_phonemes_dict.keys():
                 tagged_phoneme = "_".join([p, tag])
                 if(tagged_phoneme in sil_phonemes_dict[p]):
                     fp.write("%s " % tagged_phoneme)
diff --git a/egs/babel/s5d/local/resegment/segmentation.py b/egs/babel/s5d/local/resegment/segmentation.py
index 7c5c8665a16..02fd7646b96 100755
--- a/egs/babel/s5d/local/resegment/segmentation.py
+++ b/egs/babel/s5d/local/resegment/segmentation.py
@@ -3,6 +3,7 @@
 # Copyright 2014  Vimal Manohar
 # Apache 2.0
 
+from __future__ import division
 import os, glob, argparse, sys, re, time
 from argparse import ArgumentParser
 
@@ -19,12 +20,12 @@
 
 def mean(l):
   if len(l) > 0:
-    return float(sum(l)) / len(l)
+    return float(sum(l))/len(l)
   return 0
 
 # Analysis class
 # Stores statistics like the confusion matrix, length of the segments etc.
-class Analysis:
+class Analysis(object):
   def __init__(self, file_id, frame_shift, prefix):
     self.confusion_matrix = [0] * 9
     self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ]
@@ -274,8 +275,8 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
     i = len(this_file)
     category = splits[6]
     word = splits[5]
-    start_time = int(float(splits[3])/frame_shift + 0.5)
-    duration = int(float(splits[4])/frame_shift + 0.5)
+    start_time = int((float(splits[3])/frame_shift) + 0.5)
+    duration = int((float(splits[4])/frame_shift) + 0.5)
     if i < start_time:
       this_file.extend(["0"]*(start_time - i))
     if type1 == "NON-LEX":
@@ -295,7 +296,7 @@ def read_rttm_file(rttm_file, temp_dir, frame_shift):
 # Stats class to store some basic stats about the number of
 # times the post-processor goes through particular loops or blocks
 # of code in the algorithm. This is just for debugging.
-class Stats:
+class Stats(object):
   def __init__(self):
     self.inter_utt_nonspeech = 0
     self.merge_nonspeech_segment = 0
@@ -321,7 +322,7 @@ def reset(self):
     self.noise_only = 0
 
 # Timer class to time functions
-class Timer:
+class Timer(object):
   def __enter__(self):
     self.start = time.clock()
     return self
@@ -332,7 +333,7 @@ def __exit__(self, *args):
 # The main class for post-processing a file.
 # This does the segmentation either looking at the file isolated
 # or by looking at both classes simultaneously
-class JointResegmenter:
+class JointResegmenter(object):
   def __init__(self, P, A, f, options, phone_map, stats = None, reference = None):
 
     # Pointers to prediction arrays and Initialization
@@ -351,8 +352,8 @@ def __init__(self, P, A, f, options, phone_map, stats = None, reference = None):
 
     self.frame_shift = options.frame_shift
     # Convert length in seconds to frames
-    self.max_frames = int(options.max_segment_length / options.frame_shift)
-    self.hard_max_frames = int(options.hard_max_segment_length / options.frame_shift)
+    self.max_frames = int(options.max_segment_length/options.frame_shift)
+    self.hard_max_frames = int(options.hard_max_segment_length/options.frame_shift)
     self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length / options.frame_shift)
     if ( options.remove_noise_only_segments == "false" ):
       self.remove_noise_segments = False
@@ -540,7 +541,7 @@ def set_nonspeech_proportion(self):
     # Set the number of non-speech frames to be added depending on the
     # silence proportion. The target number of frames in the segments
     # is computed as below:
-    target_segment_frames = int(num_speech_frames / (1.0 - self.options.silence_proportion))
+    target_segment_frames = int(num_speech_frames/(1.0 - self.options.silence_proportion))
 
     # The number of frames currently in the segments
     num_segment_frames = num_speech_frames
@@ -599,7 +600,7 @@ def set_nonspeech_proportion(self):
       if not changed:   # avoid an infinite loop. if no changes, then break.
         break
     if num_segment_frames < target_segment_frames:
-      proportion = float(num_segment_frames - num_speech_frames) / num_segment_frames
+      proportion = float(num_segment_frames - num_speech_frames)/ num_segment_frames
       sys.stderr.write("%s: Warning: for recording %s, only got a proportion %f of non-speech frames, versus target %f\n" % (sys.argv[0], self.file_id, proportion, self.options.silence_proportion))
 
     ###########################################################################
@@ -863,14 +864,14 @@ def split_long_segments(self):
           # Count the number of times long segments are split
           self.stats.split_segments += 1
 
-          num_pieces = int((float(segment_length) / self.hard_max_frames) + 0.99999)
+          num_pieces = int((float(segment_length)/self.hard_max_frames) + 0.99999)
           sys.stderr.write("%s: Warning: for recording %s, " \
               % (sys.argv[0], self.file_id) \
               + "splitting segment of length %f seconds into %d pieces " \
               % (segment_length * self.frame_shift, num_pieces) \
               + "(--hard-max-segment-length %f)\n" \
               % self.options.hard_max_segment_length)
-          frames_per_piece = int(segment_length / num_pieces)
+          frames_per_piece = int(segment_length/num_pieces)
           for i in range(1,num_pieces):
             q = n + i * frames_per_piece
             self.S[q] = True
@@ -1388,7 +1389,7 @@ def main():
 
   speech_cap = None
   if options.speech_cap_length != None:
-    speech_cap = int( options.speech_cap_length / options.frame_shift )
+    speech_cap = int(options.speech_cap_length/options.frame_shift)
   # End if
 
   for f in pred_files:
@@ -1454,7 +1455,7 @@ def main():
         f2 = f3
       # End if
 
-      if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift:
+      if (len(A1) - len(A2)) > options.max_length_diff/options.frame_shift:
         sys.stderr.write( \
             "%s: Warning: Lengths of %s and %s differ by more than %f. " \
             % (sys.argv[0], f1,f2, options.max_length_diff) \
diff --git a/egs/bentham/v1/local/gen_topo.py b/egs/bentham/v1/local/gen_topo.py
index 540bfbcf270..af9e20317d8 100755
--- a/egs/bentham/v1/local/gen_topo.py
+++ b/egs/bentham/v1/local/gen_topo.py
@@ -9,6 +9,7 @@
 # the number of states for other characters.
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import string
 
@@ -19,11 +20,11 @@
 parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
 parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
 parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
-parser.add_argument("nonsilence_phones", type=str,
+parser.add_argument("nonsilence_phones",
                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
-parser.add_argument("silence_phones", type=str,
+parser.add_argument("silence_phones",
                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
-parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+parser.add_argument("phone_list", help="file containing all phones and their corresponding number.");
 
 args = parser.parse_args()
 
@@ -47,8 +48,8 @@
 print("</ForPhones>")
 for x in range(0, args.num_nonsil_states):
     xp1 = x + 1
-    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
-print("<State> " + str(args.num_nonsil_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(x, xp1))
+print("<State> {} </State>".format(args.num_nonsil_states))
 print("</TopologyEntry>")
 
 # For nonsilence phones that ar punctuations
@@ -58,8 +59,8 @@
 print("</ForPhones>")
 for x in range(0, args.num_punctuation_states):
     xp1 = x + 1
-    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
-print("<State> " + str(args.num_punctuation_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(x, xp1))
+print("<State> {} </State>".format(args.num_punctuation_states))
 print("</TopologyEntry>")
 
 # For silence phones
@@ -68,25 +69,25 @@
 print(" ".join([str(x) for x in silence_phones]))
 print("</ForPhones>")
 if(args.num_sil_states > 1):
-    transp = 1.0 / (args.num_sil_states - 1)
+    transp = 1.0/(args.num_sil_states - 1)
     
     state_str = "<State> 0 <PdfClass> 0 "
     for x in range(0, (args.num_sil_states - 1)):
-        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+        state_str = "{} <Transition> {} {} ".format(state_str, x, transp)
     state_str = state_str + "</State>"
     print(state_str)
 
     for x in range(1, (args.num_sil_states - 1)):
-        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        state_str = "<State> {0} <PdfClass> {0} ".format(x)
         for y in range(1, args.num_sil_states):
-            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+        state_str = "{} <Transition> {} {} ".format(state_str, y, transp)
         state_str = state_str + "</State>"
         print(state_str)
     second_last = args.num_sil_states - 1
-    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
-    print("<State> " + str(args.num_sil_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(second_last, args.num_sil_states))
+    print("<State> {} </State>".format(args.num_sil_states))
 else:
     print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
-    print("<State> " + str(args.num_sil_states) + " </State>")
+    print("<State> {} </State>".format(args.num_sil_states))
 print("</TopologyEntry>")
 print("</Topology>")
diff --git a/egs/bn_music_speech/v1/local/make_annotations_bn.py b/egs/bn_music_speech/v1/local/make_annotations_bn.py
index 53cebf52ea4..86bec7b16ae 100755
--- a/egs/bn_music_speech/v1/local/make_annotations_bn.py
+++ b/egs/bn_music_speech/v1/local/make_annotations_bn.py
@@ -9,6 +9,7 @@
 #
 # This file is meant to be invoked by make_bn.sh.
 
+from __future__ import print_function
 import sys, re, os
 
 def is_speech(line):
@@ -37,7 +38,7 @@ def extract_speech(line):
   m = re.search('(?<=E_time=)\d+.\d+', line)
   end = float(m.group(0))
   if start > end:
-    print "Skipping annotation where end time is before start time:", line
+    print("Skipping annotation where end time is before start time: {}".format(line))
   return start, end
 
 def extract_other_type2(line):
@@ -46,7 +47,7 @@ def extract_other_type2(line):
   m = re.search('(?<=E_time=)\d+.\d+', line)
   end = float(m.group(0))
   if start > end:
-    print "Skipping annotation where end time is before start time:", line
+    print("Skipping annotation where end time is before start time: {}".format(line))
   return start, end
 
 def extract_music(line):
@@ -60,7 +61,7 @@ def extract_music(line):
   elif level == "O":
     is_on = False
   else:
-    print "Encountered bad token on line:", line
+    print("Encountered bad token on line: {}".format(line))
     sys.exit()
   return time, is_on
 
@@ -75,7 +76,7 @@ def extract_other_type1(line):
   elif level == "O":
     is_on = False
   else:
-    print "Encountered bad token on line:", line
+    print("Encountered bad token on line: {}".format(line))
     sys.exit()
   return time, is_on
 
@@ -92,11 +93,11 @@ def process_file(annos):
   for line in annos:
     if is_speech(line):
       speech_start, speech_end = extract_speech(line)
-      speech = speech + str(speech_start) + " " + str(speech_end) + "\n"
+      speech = "{}{} {}\n".format(speech, speech_start, speech_end)
       max_time = max(speech_end, max_time)
     elif is_other_type2(line):
       other_type2_start, other_type2_end = extract_other_type2(line)
-      other_type2 = other_type2 + str(other_type2_start) + " " + str(other_type2_end) + "\n"
+      other_type2 = "{}{} {}\n".format(other_type2, other_type2_start, other_type2_end)
       max_time = max(other_type2_end, max_time)
     elif is_music(line):
       time, is_on = extract_music(line)
@@ -105,7 +106,7 @@ def process_file(annos):
         prev_music_time = time
         start_new_music_segment = False
       elif not is_on and not start_new_music_segment:
-        music = music + str(prev_music_time) + " " + str(time) + "\n"
+        music = "{}{} {}\n".format(music, prev_music_time, time)
         start_new_music_segment = True
     elif is_other_type1(line):
       time, is_on = extract_other_type1(line)
@@ -114,13 +115,13 @@ def process_file(annos):
         prev_other_time = time
         start_new_other_segment = False
       elif not is_on and not start_new_other_segment:
-        other_type1 = other_type1 + str(prev_other_time) + " " + str(time) + "\n"
+        other_type1 = "{}{} {}\n".format(other_type1, prev_other_time, time)
         start_new_other_segment = True
 
   if not start_new_music_segment:
-    music = music + str(prev_music_time) + " " + str(max_time) + "\n"
+    music = "{}{} {}\n".format(music, prev_music_time, max_time)
   if not start_new_other_segment:
-    other_type1 = other_type1 + str(prev_other_time) + " " + str(max_time) + "\n"
+    other_type1 = "{}{} {}\n".format(other_type1, prev_other_time, max_time)
 
   other = other_type1 + other_type2
   return speech, music, other
diff --git a/egs/bn_music_speech/v1/local/make_bn.py b/egs/bn_music_speech/v1/local/make_bn.py
index 98836d32534..7ec9aabcbdf 100755
--- a/egs/bn_music_speech/v1/local/make_bn.py
+++ b/egs/bn_music_speech/v1/local/make_bn.py
@@ -20,7 +20,7 @@
   for file in files:
     utt = str(file).replace(".sph", "")
     if file.endswith(".sph") and utt in utts:
-      wav = wav + utt + " sox " + subdir + "/" + utt + ".sph"  + " -c 1 -r 16000 -t wav - |\n"
+      wav = "{0}{1} sox {2}/{1}.sph -c 1 -r 16000 -t -wav - |\n".format(wav, utt, subdir)
 wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
 wav_fi.write(wav)
 
@@ -32,14 +32,14 @@
   count = 1
   for line in music_fi:
     left, right = line.rstrip().split(" ")
-    segments = segments + utt + "-music-" + str(count) + " " + utt + " " + left + " " + right + "\n"
-    utt2spk = utt2spk + utt + "-music-" + str(count) + " " + utt + "-music-" + str(count) + "\n"
+    segments = "{0}{1}-music-{2} {1} {3} {4}\n".format(segments, utt, count, left, right)
+    utt2spk = "{0}{1}-music-{2} {1}-music-{2}".format(utt2spk, utt,count)
     count += 1
   count = 1
   for line in speech_fi:
     left, right = line.rstrip().split(" ")
-    segments = segments + utt + "-speech-" + str(count) + " " + utt + " " + left + " " + right + "\n"
-    utt2spk = utt2spk + utt + "-speech-" + str(count) + " " + utt + "-speech-" + str(count) + "\n"
+    segments = "{0}{1}-speech-{2} {1} {3} {4}\n".format(segments, utt, count, left, right)
+    utt2spk = "{0}{1}-speech-{2} {1}-music-{2}".format(utt2spk, utt, count)
     count += 1
 utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
 utt2spk_fi.write(utt2spk)
diff --git a/egs/bn_music_speech/v1/local/make_musan.py b/egs/bn_music_speech/v1/local/make_musan.py
index b3795fe2b7d..942973cfc65 100755
--- a/egs/bn_music_speech/v1/local/make_musan.py
+++ b/egs/bn_music_speech/v1/local/make_musan.py
@@ -43,9 +43,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -69,9 +69,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -95,9 +95,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/bn_music_speech/v1/local/print_scores.py b/egs/bn_music_speech/v1/local/print_scores.py
index c2b587cdcad..e563afb63d7 100755
--- a/egs/bn_music_speech/v1/local/print_scores.py
+++ b/egs/bn_music_speech/v1/local/print_scores.py
@@ -11,6 +11,7 @@
 # those strings to determine if it is a target or nontarget
 # utterance. We arbitrarily pick music to be the target class.
 
+from __future__ import print_function
 import sys
 utt2score = open(sys.argv[1], 'r').readlines()
 for i in range(0, len(utt2score)):
@@ -19,4 +20,4 @@
     type = "target"
   else:
     type = "nontarget"
-  print score, type
+  print(score, type)
diff --git a/egs/bn_music_speech/v1/local/refine_annotations_bn.py b/egs/bn_music_speech/v1/local/refine_annotations_bn.py
index 52ac87c8640..31cb1803f57 100755
--- a/egs/bn_music_speech/v1/local/refine_annotations_bn.py
+++ b/egs/bn_music_speech/v1/local/refine_annotations_bn.py
@@ -10,6 +10,7 @@
 # designated length are created.
 #
 # This file is meant to be invoked from make_bn.sh.
+from __future__ import division
 import sys, os
 
 def seg_to_string(seg):
@@ -23,7 +24,7 @@ def seg_to_string(seg):
 def process_segs(raw_segs):
   segs = []
   for seg in raw_segs:
-    lower, upper = map(float, seg.rstrip().split(" "))
+    lower, upper = [float(i) for i in seg.rstrip().split(" ")]
     segs.append((lower, upper))
   return segs
 
@@ -60,8 +61,8 @@ def resegment(music, speech, other, frame_length, min_seg):
   start_frame = 0
   for i in range(1, len(frame2classes)):
     if curr_class != frame2classes[i]:
-      start = float(start_frame) / frame_length
-      end = float(i) / frame_length
+      start = float(start_frame)/frame_length
+      end = float(i)/frame_length
       if end - start > min_seg:
         if curr_class == "music":
           new_music.append((start, end))
diff --git a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
index 882b5800908..d7bb389bad5 100755
--- a/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
+++ b/egs/callhome_diarization/v1/diarization/extract_ivectors.sh
@@ -92,7 +92,7 @@ if [ $stage -le 0 ]; then
   fi
   utils/data/get_uniform_subsegments.py \
       --max-segment-duration=$window \
-      --overlap-duration=$(echo "$window-$period" | bc) \
+      --overlap-duration=$(perl -e "print $window-$period") \
       --max-remaining-duration=$min_segment \
       --constant-duration=True \
       $segments > $dir/subsegments
diff --git a/egs/callhome_diarization/v1/local/make_musan.py b/egs/callhome_diarization/v1/local/make_musan.py
index b3f6652ba40..974e73e0777 100755
--- a/egs/callhome_diarization/v1/local/make_musan.py
+++ b/egs/callhome_diarization/v1/local/make_musan.py
@@ -43,9 +43,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file: {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -69,9 +69,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file: {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -95,9 +95,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file: {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/callhome_egyptian/s5/local/convert_symtable_to_utf.py b/egs/callhome_egyptian/s5/local/convert_symtable_to_utf.py
index f5b69a1ff86..7192ff7a1cc 100644
--- a/egs/callhome_egyptian/s5/local/convert_symtable_to_utf.py
+++ b/egs/callhome_egyptian/s5/local/convert_symtable_to_utf.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 #!/usr/bin/env py
 
 # Converts a romanized ECA word list (symbol table) to
@@ -7,9 +8,9 @@
 import codecs
 
 if len(sys.argv) < 3:
-    print "USAGE: local/convert_symtable_to_utf.py [SYMTABLE] [ECA-LEXICON]"
-    print "E.g., local/convert_symtable_to_utf.py data/lang/words.txt \
-                /export/corpora/LDC/LDC99L22"
+    print("USAGE: local/convert_symtable_to_utf.py [SYMTABLE] [ECA-LEXICON]")
+    print("E.g., local/convert_symtable_to_utf.py data/lang/words.txt \
+                /export/corpora/LDC/LDC99L22")
     sys.exit(1)
 
 # Note that the ECA lexicon's default encoding is ISO-8859-6, not UTF8
diff --git a/egs/callhome_egyptian/s5/local/splits/get_conversation.py b/egs/callhome_egyptian/s5/local/splits/get_conversation.py
index c999d3e597e..80f66174e2b 100755
--- a/egs/callhome_egyptian/s5/local/splits/get_conversation.py
+++ b/egs/callhome_egyptian/s5/local/splits/get_conversation.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
 import os
 import re
 
@@ -37,14 +38,14 @@
         evaltest[pathComponents[12]] = numberOfConversations
         testConv = testConv + numberOfConversations
 
-print "==============Train==============="
-print train
-print "Total Conversations in train = " + str(trainConv)
-print "==============Dev==============="
-print devtest
-print "Total Conversations in dev = " + str(devConv)
-print "==============Test==============="
-print evaltest
-print "Total Conversations in test = " + str(testConv)
-print "================================="
-print "Total Conversations in Corpus = " + str(trainConv + devConv + testConv)
+print("==============Train===============")
+print(train)
+print("Total Conversations in train = {}".format(trainConv))
+print("==============Dev===============")
+print(devtest)
+print("Total Conversations in dev = {}".format(devConv))
+print("==============Test===============")
+print(evaltest)
+print("Total Conversations in test = {}".format(testConv))
+print("=================================")
+print("Total Conversations in Corpus = {}".format(trainConv + devConv + testConv))
diff --git a/egs/chime5/s5/local/json2text.py b/egs/chime5/s5/local/json2text.py
index 4df0160efb6..a0142ad916e 100755
--- a/egs/chime5/s5/local/json2text.py
+++ b/egs/chime5/s5/local/json2text.py
@@ -25,8 +25,8 @@ def hms_to_seconds(hms):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('json', type=str, help='JSON transcription file')
-    parser.add_argument('--mictype', type=str,
+    parser.add_argument('json', help='JSON transcription file')
+    parser.add_argument('--mictype',
                         choices=['ref', 'worn', 'u01', 'u02', 'u03', 'u04', 'u05', 'u06'],
                         help='Type of microphones')
     args = parser.parse_args()
diff --git a/egs/cifar/v1/image/get_allowed_lengths.py b/egs/cifar/v1/image/get_allowed_lengths.py
index 44e17028695..33996c8eef1 100755
--- a/egs/cifar/v1/image/get_allowed_lengths.py
+++ b/egs/cifar/v1/image/get_allowed_lengths.py
@@ -10,6 +10,7 @@
     file is later used by make_features.py to pad each image sufficiently so that
     they all have an allowed length. This is intended for end2end chain training.
 """
+from __future__ import division
 
 import argparse
 import os
@@ -124,7 +125,7 @@ def find_allowed_durations(start_len, end_len, args):
 
 def main():
     args = get_args()
-    args.factor = 1.0 + args.factor / 100.0
+    args.factor = 1.0 + args.factor/100.0
 
     image2length = read_kaldi_mapfile(os.path.join(args.srcdir, 'image2num_frames'))
 
@@ -133,7 +134,7 @@ def main():
                 "Coverage rate: {}%".format(start_dur, end_dur,
                                       100.0 - args.coverage_factor * 2))
     logger.info("There will be {} unique allowed lengths "
-                "for the images.".format(int(math.log(end_dur / start_dur) /
+                "for the images.".format(int((math.log(float(end_dur)/start_dur))/
                                              math.log(args.factor))))
 
     allowed_durations = find_allowed_durations(start_dur, end_dur, args)
diff --git a/egs/cifar/v1/image/matrix_to_image.py b/egs/cifar/v1/image/matrix_to_image.py
index 52dcead7479..908b1f8b3ed 100755
--- a/egs/cifar/v1/image/matrix_to_image.py
+++ b/egs/cifar/v1/image/matrix_to_image.py
@@ -26,6 +26,7 @@
 copy-feats --binary=false $(grep $imgid data/train/feats.scp | cut -d' ' -f2) - | \
            image/matrix_to_image.py --color=1 > $imgid.bmp
 """
+from __future__ import division
 
 import argparse
 import sys
@@ -59,7 +60,7 @@
         num_cols = len(line)  # initialize
     if len(line) != num_cols:
         raise Exception("All rows should be of the same length")
-    line = map(float, line)  # string to float
+    line = [float(i) for i in line]  # string to float
     if max(line) > 1:
         raise Excetion("Element value in the matrix should be normalized and no larger than 1")
     line = [int(x * 255) for x in line]  # float to integer ranging from 0 to 255
@@ -70,7 +71,7 @@
     if num_cols % 3 != 0:
         raise Exception("Number of columns should be a multiple of 3 in the color mode")
     width = num_rows
-    height = num_cols / 3
+    height = num_cols/3
     # reform the image matrix
     image_array = [[0 for i in range(width * 3)] for j in range(height)]
     for i in range(height):
diff --git a/egs/cifar/v1/image/select_image_in_egs.py b/egs/cifar/v1/image/select_image_in_egs.py
index 88d7d568e66..dbf48e6403d 100755
--- a/egs/cifar/v1/image/select_image_in_egs.py
+++ b/egs/cifar/v1/image/select_image_in_egs.py
@@ -9,6 +9,7 @@
 #     --vertical-shift=0.3 --srand=27 --num-channels=3 ark:exp/cifar10_egs/egs.1.ark ark,t:- | \
 #     image/select_image_in_egs.py $id | image/matrix_to_image.py --color 3 > $id.bmp
 
+from __future__ import print_function
 import argparse
 import sys
 
diff --git a/egs/cifar/v1/local/process_data.py b/egs/cifar/v1/local/process_data.py
index 51173dafc6f..38a599297d2 100755
--- a/egs/cifar/v1/local/process_data.py
+++ b/egs/cifar/v1/local/process_data.py
@@ -6,6 +6,7 @@
 
 """ This script prepares the training and test data for CIFAR-10 or CIFAR-100.
 """
+from __future__ import division
 
 import argparse
 import os
@@ -14,13 +15,13 @@
 parser = argparse.ArgumentParser(description="""Converts train/test data of
                                                 CIFAR-10 or CIFAR-100 to
                                                 Kaldi feature format""")
-parser.add_argument('database', type=str,
+parser.add_argument('database',
                     default='data/dl/cifar-10-batches-bin',
                     help='path to downloaded cifar data (binary version)')
-parser.add_argument('dir', type=str, help='output dir')
-parser.add_argument('--cifar-version', type=str, default='CIFAR-10', choices=['CIFAR-10', 'CIFAR-100'])
-parser.add_argument('--dataset', type=str, default='train', choices=['train', 'test'])
-parser.add_argument('--out-ark', type=str, default='-', help='where to write output feature data')
+parser.add_argument('dir', help='output dir')
+parser.add_argument('--cifar-version', default='CIFAR-10', choices=['CIFAR-10', 'CIFAR-100'])
+parser.add_argument('--dataset', default='train', choices=['train', 'test'])
+parser.add_argument('--out-ark', default='-', help='where to write output feature data')
 
 args = parser.parse_args()
 
@@ -37,7 +38,7 @@ def load_cifar10_data_batch(datafile):
         for i in range(num_images_in_batch):
             label = ord(fh.read(1))
             bin_img = fh.read(C * H * W)
-            img = [[[ord(byte) / 255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
+            img = [[[ord(byte)/255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
                   for row in range(H)] for channel in range(C)]
             labels += [label]
             data += [img]
@@ -52,7 +53,7 @@ def load_cifar100_data_batch(datafile, num_images_in_batch):
             coarse_label = ord(fh.read(1))
             fine_label = ord(fh.read(1))
             bin_img = fh.read(C * H * W)
-            img = [[[ord(byte) / 255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
+            img = [[[ord(byte)/255.0 for byte in bin_img[channel*H*W+row*W:channel*H*W+(row+1)*W]]
                   for row in range(H)] for channel in range(C)]
             fine_labels += [fine_label]
             coarse_labels += [coarse_label]
@@ -80,7 +81,7 @@ def write_kaldi_matrix(file_handle, matrix, key):
         if num_cols != len(matrix[row_index]):
             raise Exception("All the rows of a matrix are expected to "
                             "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        file_handle.write(" ".join([str(x) for x in matrix[row_index]]))
         if row_index != num_rows - 1:
             file_handle.write("\n")
     file_handle.write(" ]\n")
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_dev.py b/egs/dihard_2018/v1/local/make_dihard_2018_dev.py
index 71b2b1b0143..fa652da8b4c 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_dev.py
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_dev.py
@@ -35,7 +35,7 @@ def prepare_dihard_2018_dev(src_dir, data_dir):
                 rttm_fi.write(rttm_str)
                 with open("{}/data/rttm/{}.rttm".format(src_dir, utt), 'r') as fh:
                     rttm_list = fh.readlines()
-                spk_list = map(lambda x: (x.split())[7], rttm_list) 
+                spk_list = [(x.split())[7] for x in rttm_list] 
                 num_spk = len(set(spk_list))
                 reco2num_spk_fi.write("{} {}\n".format(utt, num_spk))
     wavscp_fi.close()
diff --git a/egs/dihard_2018/v1/local/make_dihard_2018_eval.py b/egs/dihard_2018/v1/local/make_dihard_2018_eval.py
index f8bd434f51a..2a8acbee58d 100755
--- a/egs/dihard_2018/v1/local/make_dihard_2018_eval.py
+++ b/egs/dihard_2018/v1/local/make_dihard_2018_eval.py
@@ -35,7 +35,7 @@ def prepare_dihard_2018_eval(src_dir, data_dir):
                 rttm_fi.write(rttm_str)
                 with open("{}/data/rttm/{}.rttm".format(src_dir, utt), 'r') as fh:
                     rttm_list = fh.readlines()
-                spk_list = map(lambda x: (x.split())[7], rttm_list) 
+                spk_list = [(x.split())[7] for x in rttm_list] 
                 num_spk = len(set(spk_list))
                 reco2num_spk_fi.write("{} {}\n".format(utt, num_spk))
     wavscp_fi.close()
diff --git a/egs/dihard_2018/v2/local/make_musan.py b/egs/dihard_2018/v2/local/make_musan.py
index 74c434990fb..c4b5c9359b4 100755
--- a/egs/dihard_2018/v2/local/make_musan.py
+++ b/egs/dihard_2018/v2/local/make_musan.py
@@ -47,9 +47,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -73,9 +73,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -99,9 +99,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/fame/v1/local/prepare_for_eer.py b/egs/fame/v1/local/prepare_for_eer.py
index 59d2985e7c2..f1dbcfa9ab6 100755
--- a/egs/fame/v1/local/prepare_for_eer.py
+++ b/egs/fame/v1/local/prepare_for_eer.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
@@ -12,4 +13,4 @@
   spkrutt2target[spkr+utt]=target
 for line in scores:
   spkr, utt, score = line.strip().split()
-  print score, spkrutt2target[spkr+utt]
+  print(score, spkrutt2target[spkr+utt])
diff --git a/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf b/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..d870ab04c38
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=8000 #  Switchboard is sampled at 8kHz
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
diff --git a/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf b/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/fisher_callhome_spanish/s5/local/callhome_get_lattices.py b/egs/fisher_callhome_spanish/s5/local/callhome_get_lattices.py
index 9112d868c25..4c96e01ce7e 100755
--- a/egs/fisher_callhome_spanish/s5/local/callhome_get_lattices.py
+++ b/egs/fisher_callhome_spanish/s5/local/callhome_get_lattices.py
@@ -5,6 +5,7 @@
 # The list of files in the conversations for which 1 best output has to be extracted
 # words.txt
 
+from __future__ import print_function
 import os
 import sys
 import subprocess
@@ -76,7 +77,7 @@ def findLattice(timeDetail):
                 # Concatenate lattices
                 mergedTranslation = latticeConcatenate(mergedTranslation, tmp)
 
-        print mergedTranslation
+        print(mergedTranslation)
         if mergedTranslation != "":
 
             # Sanjeev's Recipe : Remove epsilons and topo sort
@@ -95,16 +96,16 @@ def findLattice(timeDetail):
             # file so it can be checked later
             proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True)
             line = proc.stdout.readline()
-            print line + " " + str(lineNo)
+            print("{} {}".format(line, lineNo))
             if line.strip() != "PLF format appears to be correct.":
                 os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0])
                 invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n")
-                rmLines.write(str(lineNo) + "\n")
+                rmLines.write("{}\n".format(lineNo))
             else:
                 provFile.write(PLFline)
         else:
             blankPLF.write(timeInfo[0] + "\n")
-            rmLines.write(str(lineNo) + "\n")
+            rmLines.write("{}\n".format(lineNo))
         # Now convert to PLF
         lineNo += 1
 
diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
new file mode 100755
index 00000000000..c487f1bd222
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh
@@ -0,0 +1,288 @@
+#!/bin/bash
+
+# 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e.
+#   with bypass resnet connections, and re-tuned.
+# compute-wer --text --mode=present ark:exp/chain/multipsplice_tdnn/decode_fsp_train_test/scoring_kaldi/test_filt.txt ark,p:- 
+# %WER 22.21 [ 8847 / 39831, 1965 ins, 2127 del, 4755 sub ]
+# %SER 56.98 [ 3577 / 6278 ]
+# Scored 6278 sentences, 0 not present in hyp.
+
+# steps/info/chain_dir_info.pl  exp/chain/multipsplice_tdnn
+# exp/chain/multipsplice_tdnn: num-iters=296 nj=1..2 num-params=8.2M dim=40+100->2489 combine=-0.170->-0.165 (over 8) xent:train/valid[196,295,final]=(-2.30,-1.93,-1.83/-2.24,-1.96,-1.86) logprob:train/valid[196,295,final]=(-0.208,-0.169,-0.164/-0.189,-0.161,-0.158)
+
+set -e -o pipefail
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+nj=30
+train_set=train
+test_sets="test dev"
+gmm=tri5a        # this is the source gmm-dir that we'll use for alignments; it
+                 # should have alignments for the specified training data.
+num_threads_ubm=32
+nnet3_affix=       # affix for exp dirs, e.g. it was _cleaned in tedlium.
+
+# Options which are not passed through to run_ivector_common.sh
+affix=1g   #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# LSTM/chain options
+train_stage=-10
+xent_regularize=0.1
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+# training chunk-options
+chunk_width=140,100,160
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+
+# training options
+srand=0
+remove_egs=true
+
+#decode options
+test_online_decoding=false  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [ $stage -le 15 ]; then
+echo "local/nnet3/run_ivector_common.sh \
+  --stage $stage --nj $nj \
+  --train-set $train_set --gmm $gmm \
+  --num-threads-ubm $num_threads_ubm \
+  --nnet3-affix "$nnet3_affix""
+
+local/nnet3/run_ivector_common.sh \
+  --stage $stage --nj $nj \
+  --train-set $train_set --gmm $gmm \
+  --num-threads-ubm $num_threads_ubm \
+  --nnet3-affix "$nnet3_affix"
+
+fi
+
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+lat_dir=exp/tri5a_lats_nodup_sp
+dir=exp/chain/multipsplice_tdnn
+train_data_dir=data/${train_set}_sp_hires
+train_ivector_dir=exp/nnet3/ivectors_train_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+
+# note: you don't necessarily have to change the treedir name
+# each time you do a new experiment-- only if you change the
+# configuration in a way that affects the tree.
+tree_dir=exp/chain/${gmm}_tree
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_${gmm}_chain
+
+#for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+#    $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \
+#    $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
+#  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+#done
+
+
+if [ $stage -le 16 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 17 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 18 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$train_cmd" 3500 ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 19 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.005"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3
+  linear-component name=prefinal-l dim=192 $linear_opts
+
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 20 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.srand $srand \
+    --trainer.max-param-change 2.0 \
+    --trainer.num-epochs 4 \
+    --trainer.frames-per-iter 5000000 \
+    --trainer.optimization.num-jobs-initial 1 \
+    --trainer.optimization.num-jobs-final=2 \
+    --trainer.optimization.initial-effective-lrate 0.0005 \
+    --trainer.optimization.final-effective-lrate 0.00005 \
+    --trainer.num-chunk-per-minibatch 128,64 \
+    --trainer.optimization.momentum 0.0 \
+    --egs.chunk-width $chunk_width \
+    --egs.chunk-left-context 0 \
+    --egs.chunk-right-context 0 \
+    --egs.dir "$common_egs_dir" \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs $remove_egs \
+    --use-gpu true \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir exp/tri5a_lats_nodup_sp \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 21 ]; then
+  # The reason we are using data/lang_test here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+  #LM was trained only on Fisher Spanish train subset.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/lang_test \
+    $tree_dir $tree_dir/graph_fsp_train || exit 1;
+
+fi
+
+rnnlmdir=exp/rnnlm_lstm_tdnn_1b
+if [ $stage -le 22 ]; then
+  local/rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1;
+fi
+
+if [ $stage -le 23 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+      nspk=$(wc -l <data/${data}_hires/spk2utt)
+      for lmtype  in fsp_train; do
+        steps/nnet3/decode.sh \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --extra-left-context 0 --extra-right-context 0 \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk $frames_per_chunk \
+          --nj $nspk --cmd "$decode_cmd"  --num-threads 4 \
+          --online-ivector-dir exp/nnet3/ivectors_${data}_hires \
+          $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data} || exit 1;
+      done
+      bash local/rnnlm/lmrescore_nbest.sh 1.0 data/lang_test $rnnlmdir data/${data}_hires/ \
+	      ${dir}/decode_${lmtype}_${data} $dir/decode_rnnLM_${lmtype}_${data} || exit 1;
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
diff --git a/egs/fisher_callhome_spanish/s5/local/decode_report.py b/egs/fisher_callhome_spanish/s5/local/decode_report.py
index 0331ab5d6f4..6f3d3f80c95 100755
--- a/egs/fisher_callhome_spanish/s5/local/decode_report.py
+++ b/egs/fisher_callhome_spanish/s5/local/decode_report.py
@@ -7,6 +7,7 @@
 # This script is specific to my partitions and needs to be made more general
 # or modified
 
+from __future__ import print_function
 import subprocess
 import os
 
@@ -46,8 +47,8 @@ def get_best_wer(decode_dir):
     best_iteration = 0
     best_wer = 100.0
     for i in range(16):
-        if os.path.isfile(decode_dir + "/wer_" + str(i)):
-            result = subprocess.check_output("tail -n 3 " + decode_dir + "/wer_" + str(i), shell=True)
+        if os.path.isfile("{}/wer_{}".format(decode_dir, i)):
+            result = subprocess.check_output("tail -n 3 {}/wer_{}".format(decode_dir, i), shell=True)
             wer_string = result.split("\n")[0]
             wer_details = wer_string.split(' ')
             # Get max WER
@@ -58,8 +59,8 @@ def get_best_wer(decode_dir):
     return best_iteration, best_wer
 
 for decode_dir in decode_directories[:6]:
-    print decode_dir
-    print get_best_wer(decode_dir)
+    print(decode_dir)
+    print(get_best_wer(decode_dir))
 
 # Separate processing for bMMI stuff
 best_wer = 100.0
@@ -73,8 +74,8 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
 
 best_wer = 100.0
 best_dir = ""
@@ -87,8 +88,8 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
 
 best_wer = 100.0
 best_dir = ""
@@ -101,8 +102,8 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
 
 best_wer = 100.0
 best_dir = ""
@@ -115,8 +116,8 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
 
 best_wer = 100.0
 best_dir = ""
@@ -129,8 +130,8 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
 
 best_wer = 100.0
 best_dir = ""
@@ -143,5 +144,5 @@ def get_best_wer(decode_dir):
         best_dir = decode_dir
         best_iteration = iteration
 
-print best_dir
-print (best_iteration, best_wer)
+print(best_dir)
+print((best_iteration, best_wer))
diff --git a/egs/fisher_callhome_spanish/s5/local/get_lattices.py b/egs/fisher_callhome_spanish/s5/local/get_lattices.py
index a44facbce44..5430c18bb5b 100755
--- a/egs/fisher_callhome_spanish/s5/local/get_lattices.py
+++ b/egs/fisher_callhome_spanish/s5/local/get_lattices.py
@@ -5,6 +5,7 @@
 # The list of files in the conversations for which 1 best output has to be extracted
 # words.txt
 
+from __future__ import print_function
 import os
 import sys
 import subprocess
@@ -76,7 +77,7 @@ def findLattice(timeDetail):
                 # Concatenate lattices
                 mergedTranslation = latticeConcatenate(mergedTranslation, tmp)
 
-        print mergedTranslation
+        print(mergedTranslation)
         if mergedTranslation != "":
 
             # Sanjeev's Recipe : Remove epsilons and topo sort
@@ -95,16 +96,16 @@ def findLattice(timeDetail):
             # file so it can be checked later
             proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True)
             line = proc.stdout.readline()
-            print line + " " + str(lineNo)
+            print("{} {}".format(line, lineNo))
             if line.strip() != "PLF format appears to be correct.":
                 os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0])
                 invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n")
-                rmLines.write(str(lineNo) + "\n")
+                rmLines.write("{}\n".format(lineNo))
             else:
                 provFile.write(PLFline)
         else:
             blankPLF.write(timeInfo[0] + "\n")
-            rmLines.write(str(lineNo) + "\n")
+            rmLines.write("{}\n".format(lineNo))
         # Now convert to PLF
         lineNo += 1
 
diff --git a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py
index 5c09f09bc35..864b76b671b 100755
--- a/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py
+++ b/egs/fisher_callhome_spanish/s5/local/merge_lexicons.py
@@ -4,6 +4,7 @@
 #
 # Merges unique words from Spanish Fisher, Gigaword and the LDC spanish lexicon
 
+from __future__ import print_function
 import sys
 import json
 import codecs
@@ -24,8 +25,7 @@
     merged_lexicon.append(line.strip())
 fisher.close()
 
-print "After adding the fisher data, the lexicon contains " \
-      + str(len(merged_lexicon)) + " entries."
+print("After adding the fisher data, the lexicon contains {} entries".format(len(merged_lexicon)))
 
 # Now add data from the LDC lexicon
 ldc = codecs.open(uw_LDC, encoding='iso-8859-1')
@@ -34,12 +34,11 @@
     if entries[0].lower() not in merged_lexicon:
         merged_lexicon.append(entries[0].lower())
 
-print "After adding the LDC data, the lexicon contains " \
-      + str(len(merged_lexicon)) + " entries."
+print("After adding the LDC data, the lexicon contains {} entries".format(len(merged_lexicon)))
 
 # Finally add the gigaword data
 gigaword = json.load(open(uw_gigaword))
-gigaword = reversed(sorted(gigaword.iteritems(), key=operator.itemgetter(1)))
+gigaword = reversed(sorted(gigaword.items(), key=operator.itemgetter(1)))
 
 for item in gigaword:
     # We need a maximum of wordlimit words in the lexicon
@@ -49,8 +48,7 @@
     if item[0].lower() not in merged_lexicon:
         merged_lexicon.append(item[0].lower())
 
-print "After adding the Gigaword data, the lexicon contains " \
-      + str(len(merged_lexicon)) + " entries."
+print("After adding the Gigaword data, the lexicon contains {} entries".format(len(merged_lexicon)))
 
 # Now write the uniquewords to a file
 lf = codecs.open(tmpdir + '/uniquewords64k', encoding='utf-8', mode='w+')
@@ -61,4 +59,4 @@
 
 lf.close()
 
-print "Finshed writing unique words"
+print("Finshed writing unique words")
diff --git a/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..cc9de4d26c5
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# This script is called from scripts like local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more scripts).  It
+# contains the common feature preparation and iVector-related parts of the
+# script.  See those scripts for examples of usage.
+
+
+stage=7
+nj=30
+train_set=train   # you might set this to e.g. train.
+test_sets="test dev"
+gmm=tri5a                # This specifies a GMM-dir from the features of the type you're training the system on;
+                         # it should contain alignments for 'train_set'.
+
+num_threads_ubm=32
+nnet3_affix=             # affix for exp/nnet3 directory to put iVector stuff in (e.g.
+                         # in the tedlium recip it's _cleaned).
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+
+
+if [ $stage -le 7 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
+  echo "$0: data/${train_set}_sp_hires/feats.scp already exists."
+  echo " ... Please either remove it, or rerun this script with stage > 7."
+  exit 1
+fi
+
+
+if [ $stage -le 8 ]; then
+  echo "$0: preparing directory for speed-perturbed data"
+  utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: creating high-resolution MFCC features"
+
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=data/${train_set}_sp_hires/data
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
+  fi
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    utils/copy_data_dir.sh data/$datadir data/${datadir}_hires
+  done
+
+  # do volume-perturbation on the training data prior to extracting hires
+  # features; this helps make trained nnets more invariant to test data volume.
+  utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires
+
+  for datadir in ${train_set}_sp ${test_sets}; do
+    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/${datadir}_hires
+    steps/compute_cmvn_stats.sh data/${datadir}_hires
+    utils/fix_data_dir.sh data/${datadir}_hires
+  done
+fi
+
+if [ $stage -le 10 ]; then
+  echo "$0: computing a subset of data to train the diagonal UBM."
+
+  mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+  temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+  # train a diagonal UBM using a subset of about a quarter of the data
+  num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+  num_utts=$[$num_utts_total/4]
+  utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
+      $num_utts ${temp_data_root}/${train_set}_sp_hires_subset
+
+  echo "$0: computing a PCA transform from the hires data."
+  steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
+      --splice-opts "--left-context=3 --right-context=3" \
+      --max-utts 10000 --subsample 2 \
+       ${temp_data_root}/${train_set}_sp_hires_subset \
+       exp/nnet3${nnet3_affix}/pca_transform
+
+  echo "$0: training the diagonal UBM."
+  # Use 512 Gaussians in the UBM.
+  steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \
+    --num-frames 700000 \
+    --num-threads $num_threads_ubm \
+    ${temp_data_root}/${train_set}_sp_hires_subset 512 \
+    exp/nnet3${nnet3_affix}/pca_transform exp/nnet3${nnet3_affix}/diag_ubm
+
+fi
+
+if [ $stage -le 11 ]; then
+  # Train the iVector extractor.  Use all of the speed-perturbed data since iVector extractors
+  # can be sensitive to the amount of data.  The script defaults to an iVector dimension of
+  # 100.
+  echo "$0: training the iVector extractor"
+  steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
+    data/${train_set}_sp_hires exp/nnet3${nnet3_affix}/diag_ubm exp/nnet3${nnet3_affix}/extractor || exit 1;
+fi
+
+if [ $stage -le 12 ]; then
+  # note, we don't encode the 'max2' in the name of the ivectordir even though
+  # that's the data we extract the ivectors from, as it's still going to be
+  # valid for the non-'max2' data; the utterance list is the same.
+  ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
+    utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/wsj-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
+  fi
+
+  # We now extract iVectors on the speed-perturbed training data .  With
+  # --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
+  # each of these pairs as one speaker; this gives more diversity in iVectors..
+  # Note that these are extracted 'online' (they vary within the utterance).
+
+  # Having a larger number of speakers is helpful for generalization, and to
+  # handle per-utterance decoding well (the iVector starts at zero at the beginning
+  # of each pseudo-speaker).
+  temp_data_root=${ivectordir}
+  utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
+    data/${train_set}_sp_hires ${temp_data_root}/${train_set}_sp_hires_max2
+
+  steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
+    ${temp_data_root}/${train_set}_sp_hires_max2 \
+    exp/nnet3${nnet3_affix}/extractor $ivectordir
+
+  # Also extract iVectors for the test data, but in this case we don't need the speed
+  # perturbation (sp).
+  for data in ${test_sets}; do
+    nspk=$(wc -l <data/${data}_hires/spk2utt)
+    steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" \
+      data/${data}_hires exp/nnet3${nnet3_affix}/extractor \
+      exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+  done
+fi
+
+if [ -f data/${train_set}_sp/feats.scp ] && [ $stage -le 7 ]; then
+  echo "$0: data/${train_set}_sp/feats.scp already exists.  Refusing to overwrite the features "
+  echo " to avoid wasting time.  Please remove the file and continue if you really mean this."
+  exit 1;
+fi
+
+
+if [ $stage -le 13 ]; then
+  echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+  utils/data/perturb_data_dir_speed_3way.sh \
+    data/${train_set} data/${train_set}_sp
+fi
+
+if [ $stage -le 14 ]; then
+  echo "$0: making MFCC features for low-resolution speed-perturbed data (needed for alignments)"
+  steps/make_mfcc.sh --nj $nj \
+    --cmd "$train_cmd" data/${train_set}_sp
+  steps/compute_cmvn_stats.sh data/${train_set}_sp
+  echo "$0: fixing input data-dir to remove nonexistent features, in case some "
+  echo ".. speed-perturbed segments were too short."
+  utils/fix_data_dir.sh data/${train_set}_sp
+fi
+
+if [ $stage -le 15 ]; then
+  if [ -f $ali_dir/ali.1.gz ]; then
+    echo "$0: alignments in $ali_dir appear to already exist.  Please either remove them "
+    echo " ... or use a later --stage option."
+    exit 1
+  fi
+  echo "$0: aligning with the perturbed low-resolution data"
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    data/${train_set}_sp data/lang $gmm_dir $ali_dir
+fi
+
+
+exit 0;
diff --git a/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
new file mode 100755
index 00000000000..3713fe228d6
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)  Tony Robinson
+#           2017  Hainan Xu
+#           2017  Ke Li
+
+# This script is similar to rnnlm_lstm_tdnn_a.sh except for adding L2 regularization.
+
+# local/rnnlm/train_rnnlm.sh: best iteration (out of 18) was 17, linking it to final iteration.
+# local/rnnlm/train_rnnlm.sh: train/dev perplexity was 45.6 / 68.7.
+# Train objf: -651.50 -4.44 -4.26 -4.15 -4.08 -4.03 -4.00 -3.97 -3.94 -3.92 -3.90 -3.89 -3.88 -3.86 -3.85 -3.84 -3.83 -3.82
+# Dev objf:   -10.76 -4.68 -4.47 -4.38 -4.33 -4.29 -4.28 -4.27 -4.26 -4.26 -4.25 -4.24 -4.24 -4.24 -4.23 -4.23 -4.23 -4.23
+
+# Begin configuration section.
+dir=exp/rnnlm_lstm_tdnn_1b
+embedding_dim=200
+embedding_l2=0.005 # embedding layer l2 regularize
+comp_l2=0.005 # component-level l2 regularize
+output_l2=0.005 # output-layer l2 regularize
+epochs=90
+mic=
+stage=-10
+train_stage=0
+
+. ./cmd.sh
+. ./utils/parse_options.sh
+[ -z "$cmd" ] && cmd=$train_cmd
+
+train=data/train/text
+dev=data/dev2/text   # We at no stage in run.sh should decode dev2 partition for results!
+wordlist=data/lang/words.txt
+text_dir=data/local/rnnlm/text
+mkdir -p $dir/config
+set -e
+
+for f in $train $dev $wordlist; do
+  [ ! -f $f ] && \
+    echo "$0: expected file $f to exist; search for run.sh and utils/prepare_lang.sh in run.sh" && exit 1
+done
+
+if [ $stage -le 0 ]; then
+  mkdir -p $text_dir
+  cat $train | cut -d ' ' -f2- > $text_dir/ami.txt
+  cat $dev | cut -d ' ' -f2- > $text_dir/dev.txt
+fi
+
+if [ $stage -le 1 ]; then
+  cp $wordlist $dir/config/
+  n=`cat $dir/config/words.txt | wc -l`
+  echo "<brk> $n" >> $dir/config/words.txt
+
+  # words that are not present in words.txt but are in the training or dev data, will be
+  # mapped to <unk> during training.
+  echo "<unk>" >$dir/config/oov.txt
+
+  cat > $dir/config/data_weights.txt <<EOF
+ami  1   1.0
+EOF
+
+  rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
+                             --unk-word="<unk>" \
+                             --data-weights-file=$dir/config/data_weights.txt \
+                             $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
+
+  # choose features
+  rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
+                           --use-constant-feature=true \
+                           --top-word-features 10000 \
+                           --min-frequency 1.0e-03 \
+                           --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter]' \
+                           $dir/config/words.txt > $dir/config/features.txt
+
+lstm_opts="l2-regularize=$comp_l2"
+tdnn_opts="l2-regularize=$comp_l2"
+output_opts="l2-regularize=$output_l2"
+
+  cat >$dir/config/xconfig <<EOF
+input dim=$embedding_dim name=input
+lstm-layer name=lstm1 cell-dim=$embedding_dim $lstm_opts
+relu-renorm-layer name=tdnn dim=$embedding_dim $tdnn_opts input=Append(0, IfDefined(-1))
+lstm-layer name=lstm2 cell-dim=$embedding_dim $lstm_opts
+output-layer name=output $output_opts include-log-softmax=false dim=$embedding_dim
+EOF
+  rnnlm/validate_config_dir.sh $text_dir $dir/config
+fi
+
+if [ $stage -le 2 ]; then
+  # the --unigram-factor option is set larger than the default (100)
+  # in order to reduce the size of the sampling LM, because rnnlm-get-egs
+  # was taking up too much CPU (as much as 10 cores).
+  rnnlm/prepare_rnnlm_dir.sh --unigram-factor 200 \
+                             $text_dir $dir/config $dir
+fi
+
+if [ $stage -le 3 ]; then
+  rnnlm/train_rnnlm.sh --embedding_l2 $embedding_l2 \
+                       --stage $train_stage \
+                       --num-epochs $epochs --cmd "$cmd" $dir
+fi
+
+exit 0
diff --git a/egs/fisher_callhome_spanish/s5/local/train_get_lattices.py b/egs/fisher_callhome_spanish/s5/local/train_get_lattices.py
index 3b6755d6540..b9f906b27da 100755
--- a/egs/fisher_callhome_spanish/s5/local/train_get_lattices.py
+++ b/egs/fisher_callhome_spanish/s5/local/train_get_lattices.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 
+from __future__ import print_function
 import os
 import sys
 import subprocess
@@ -18,7 +19,7 @@
 
 latticeDict = {}
 
-for key,location in latticeLocation.iteritems():
+for key,location in latticeLocation.items():
     for root, dirs, filenames in os.walk(location):
         for f in filenames:
             latticeDict[f] = str(key)
@@ -105,16 +106,16 @@ def findLattice(timeDetail):
             # file so it can be checked later
             proc = subprocess.Popen("/export/a04/gkumar/moses/mosesdecoder/checkplf < " + finalPLFFile + " 2>&1 | awk 'FNR == 2 {print}'", stdout=subprocess.PIPE, shell=True)
             line = proc.stdout.readline()
-            print line + " " + str(lineNo)
+            print("{} {}".format(line, lineNo))
             if line.strip() != "PLF format appears to be correct.":
                 os.system("cp " + finalFST + " " + invalidplfdir + "/" + timeInfo[0])
                 invalidPLF.write(invalidplfdir + "/" + timeInfo[0] + "\n")
-                rmLines.write(str(lineNo) + "\n")
+                rmLines.write("{}\n".format(lineNo))
             else:
                 provFile.write(PLFline)
         else:
             blankPLF.write(timeInfo[0] + "\n")
-            rmLines.write(str(lineNo) + "\n")
+            rmLines.write("{}\n".format(lineNo))
         # Now convert to PLF
         lineNo += 1
 
diff --git a/egs/fisher_callhome_spanish/s5/path.sh b/egs/fisher_callhome_spanish/s5/path.sh
index 1a6fb5f891b..17ffb0369f8 100755
--- a/egs/fisher_callhome_spanish/s5/path.sh
+++ b/egs/fisher_callhome_spanish/s5/path.sh
@@ -3,3 +3,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dpovey/libs
diff --git a/egs/fisher_callhome_spanish/s5/rnnlm b/egs/fisher_callhome_spanish/s5/rnnlm
new file mode 120000
index 00000000000..fb754622d5e
--- /dev/null
+++ b/egs/fisher_callhome_spanish/s5/rnnlm
@@ -0,0 +1 @@
+../../wsj/s5/rnnlm
\ No newline at end of file
diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh
index 57902a98fed..6e2752a7b68 100755
--- a/egs/fisher_callhome_spanish/s5/run.sh
+++ b/egs/fisher_callhome_spanish/s5/run.sh
@@ -1,20 +1,22 @@
 #!/bin/bash
 #
+# Copyright 2018  Nagendra Goel, Saikiran Valluri  Apache 2.0
 # Copyright 2014  Gaurav Kumar.   Apache 2.0
 # Recipe for Fisher/Callhome-Spanish
-# Made to integrate KALDI with JOSHUA for end-to-end ASR and SMT
 
 stage=0
+train_stage=-20
+train_sgmm2=false
 
 # call the next line with the directory where the Spanish Fisher data is
 # (the values below are just an example).
-sfisher_speech=/veu4/jadrian/data/LDC/LDC2010S01
-sfisher_transcripts=/veu4/jadrian/data/LDC/LDC2010T04
-spanish_lexicon=/veu4/jadrian/data/LDC/LDC96L16
+sfisher_speech=/export/corpora/LDC/LDC2010S01
+sfisher_transcripts=/export/corpora/LDC/LDC2010T04
+spanish_lexicon=/export/corpora/LDC/LDC96L16
 split=local/splits/split_fisher
 
-callhome_speech=/veu4/jadrian/data/LDC/LDC96S35
-callhome_transcripts=/veu4/jadrian/data/LDC/LDC96T17
+callhome_speech=/export/corpora/LDC/LDC96S35
+callhome_transcripts=/export/corpora/LDC/LDC96T17
 split_callhome=local/splits/split_callhome
 
 mfccdir=`pwd`/mfcc
@@ -25,7 +27,7 @@ if [ -f path.sh ]; then . ./path.sh; fi
 
 set -e
 
-if [ $stage -lt 1 ]; then
+if [ $stage -le 1 ]; then
   local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts
 
   local/callhome_data_prep.sh $callhome_speech $callhome_transcripts
@@ -95,7 +97,7 @@ if [ $stage -lt 1 ]; then
   local/callhome_create_splits.sh $split_callhome
 fi
 
-if [ $stage -lt 2 ]; then
+if [ $stage -le 2 ]; then
   # Now compute CMVN stats for the train, dev and test subsets
   steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir
   steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir
@@ -124,90 +126,95 @@ if [ $stage -lt 2 ]; then
   utils/subset_data_dir.sh --speakers data/train 90000 data/train_100k
 fi
 
+if [ $stage -le 3 ]; then
+  steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
+    data/train_10k_nodup data/lang exp/mono0a
 
-steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
-  data/train_10k_nodup data/lang exp/mono0a
+  steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+    data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1;
 
-steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-   data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1;
-
-steps/train_deltas.sh --cmd "$train_cmd" \
+  steps/train_deltas.sh --cmd "$train_cmd" \
     2500 20000 data/train_30k data/lang exp/mono0a_ali exp/tri1 || exit 1;
 
 
-(utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
- steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri1/graph data/dev exp/tri1/decode_dev)&
+  (utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
+  steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+    exp/tri1/graph data/dev exp/tri1/decode_dev)&
 
-steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-   data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1;
+  steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+    data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1;
 
-steps/train_deltas.sh --cmd "$train_cmd" \
+  steps/train_deltas.sh --cmd "$train_cmd" \
     2500 20000 data/train_30k data/lang exp/tri1_ali exp/tri2 || exit 1;
 
-(
-  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1;
-  steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1;
-)&
-
+  (
+    utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1;
+    steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1;
+   )&
+fi
 
-steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1;
+if [ $stage -le 4 ]; then
+  steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+    data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1;
 
 # Train tri3a, which is LDA+MLLT, on 100k data.
-steps/train_lda_mllt.sh --cmd "$train_cmd" \
+  steps/train_lda_mllt.sh --cmd "$train_cmd" \
    --splice-opts "--left-context=3 --right-context=3" \
    3000 40000 data/train_100k data/lang exp/tri2_ali exp/tri3a || exit 1;
-(
-  utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1;
-  steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1;
-)&
-
+  (
+    utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1;
+    steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+     exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1;
+  )&
+fi
 
+if [ $stage -le 5 ]; then
 # Next we'll use fMLLR and train with SAT (i.e. on
 # fMLLR features)
-steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1;
+  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+    data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1;
 
-steps/train_sat.sh  --cmd "$train_cmd" \
-  4000 60000 data/train_100k data/lang exp/tri3a_ali  exp/tri4a || exit 1;
+  steps/train_sat.sh  --cmd "$train_cmd" \
+    4000 60000 data/train_100k data/lang exp/tri3a_ali  exp/tri4a || exit 1;
 
-(
-  utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri4a/graph data/dev exp/tri4a/decode_dev
+  (
+    utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri4a/graph data/dev exp/tri4a/decode_dev
 )&
 
 
-steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train data/lang exp/tri4a exp/tri4a_ali || exit 1;
+  steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+    data/train data/lang exp/tri4a exp/tri4a_ali || exit 1;
 
 # Reduce the number of gaussians
-steps/train_sat.sh  --cmd "$train_cmd" \
-  5000 120000 data/train data/lang exp/tri4a_ali  exp/tri5a || exit 1;
+  steps/train_sat.sh  --cmd "$train_cmd" \
+    5000 120000 data/train data/lang exp/tri4a_ali  exp/tri5a || exit 1;
 
-(
-  utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri5a/graph data/dev exp/tri5a/decode_dev
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri5a/graph data/test exp/tri5a/decode_test
+  (
+    utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri5a/graph data/dev exp/tri5a/decode_dev
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri5a/graph data/test exp/tri5a/decode_test
 
   # Decode CALLHOME
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev
-  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
-   exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train
-) &
-
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev
+    steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+      exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train
+    ) &
+
+
+   steps/align_fmllr.sh \
+     --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \
+     data/train data/lang exp/tri5a exp/tri5a_ali
+fi
 
-steps/align_fmllr.sh \
-  --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \
-  data/train data/lang exp/tri5a exp/tri5a_ali
+if $train_sgmm2; then
 
 steps/train_ubm.sh \
   --cmd "$train_cmd" 750 \
@@ -258,22 +265,7 @@ for iter in 1 2 3 4; do
 done
 ) &
 
-dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
-                       --parallel-opts "--num-threads 16")
-dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \
-                       --parallel-opts "--gpu 1")
-
-steps/nnet2/train_pnorm_ensemble.sh \
-  --mix-up 5000  --initial-learning-rate 0.008 --final-learning-rate 0.0008\
-  --num-hidden-layers 4 --pnorm-input-dim 2000 --pnorm-output-dim 200\
-  --cmd "$train_cmd" \
-  "${dnn_gpu_parallel_opts[@]}" \
-  --ensemble-size 4 --initial-beta 0.1 --final-beta 5 \
-  data/train data/lang exp/tri5a_ali exp/tri6a_dnn
+fi
 
-(
-  steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 \
-    --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev
-) &
-wait
+local/chain/run_tdnn_1g.sh --stage $stage --train-stage $train_stage || exit 1;
 exit 0;
diff --git a/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py b/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
index 3c447c5976a..75cc4458d85 100755
--- a/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
+++ b/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
@@ -10,6 +10,7 @@
 # en_4156 B 414.58 0.16 l
 # en_4156 B 414.74 0.17 a
 
+from __future__ import division
 import argparse,re
 __author__ = 'Minhua Wu'
  
@@ -27,7 +28,7 @@
     if items[4].find(".") != -1:
         letters = items[4].split("._")
         acronym_period = round(float(items[3]), 2)
-        letter_slot = round(acronym_period / len(letters), 2)
+        letter_slot = round(acronym_period/len(letters), 2)
         time_start = round(float(items[2]), 2)
         for l in letters[:-1]:
             time = " %.2f %.2f " % (time_start, letter_slot)
diff --git a/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py b/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
index 59814beb4ea..c3f9af09c99 100755
--- a/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
+++ b/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
@@ -10,6 +10,7 @@
 # en_4156 B 414.58 0.16 l
 # en_4156 B 414.74 0.17 a
 
+from __future__ import division
 import argparse,re
 __author__ = 'Minhua Wu'
  
@@ -27,7 +28,7 @@
     if items[4].find(".") != -1:
         letters = items[4].split("._")
         acronym_period = round(float(items[3]), 2)
-        letter_slot = round(acronym_period / len(letters), 2)
+        letter_slot = round(acronym_period/ len(letters), 2)
         time_start = round(float(items[2]), 2)
         for l in letters[:-1]:
             time = " %.2f %.2f " % (time_start, letter_slot)
diff --git a/egs/gale_mandarin/s5/local/gale_segment.py b/egs/gale_mandarin/s5/local/gale_segment.py
index 975ddb9c143..d652eb837f3 100755
--- a/egs/gale_mandarin/s5/local/gale_segment.py
+++ b/egs/gale_mandarin/s5/local/gale_segment.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 #coding:utf-8
 #!/usr/bin/env python
+from __future__ import print_function
 import sys
 from mmseg import seg_txt
 for line in sys.stdin:
@@ -12,4 +13,4 @@
       continue
     for j in seg_txt(blks[i]):
       out_line += " " + j
-  print out_line     
+  print(out_line)     
diff --git a/egs/hub4_english/s5/local/data_prep/process_1995_bn_annotation.py b/egs/hub4_english/s5/local/data_prep/process_1995_bn_annotation.py
index be0c7ad8e0d..5675dc3fbd9 100755
--- a/egs/hub4_english/s5/local/data_prep/process_1995_bn_annotation.py
+++ b/egs/hub4_english/s5/local/data_prep/process_1995_bn_annotation.py
@@ -31,9 +31,9 @@ def get_args():
 
     parser = argparse.ArgumentParser("Process 1995 CSR-IV HUB4 transcripts")
 
-    parser.add_argument("--noise-word", type=str, default="<NOISE>",
+    parser.add_argument("--noise-word", default="<NOISE>",
                         help="Word to add in-place of noise words")
-    parser.add_argument("--spoken-noise-word", type=str,
+    parser.add_argument("--spoken-noise-word",
                         default="<SPOKEN_NOISE>",
                         help="Word to add in-place of speaker noise words")
     parser.add_argument("in_file", type=argparse.FileType('r'),
@@ -230,7 +230,7 @@ def run(args):
                         start_time = story_end_time
                     segments = process_story_content(
                         args, reco_id,
-                        ' '.join([unicode(x) for x in s.children]),
+                        ' '.join([str(x) for x in s.children]),
                         start_time=story_begin_time, end_time=story_end_time)
                     write_segments(segments, args)
                 elif (s.name is not None and s.name != "language"
@@ -240,9 +240,9 @@ def run(args):
                         "or <language> or <sung>; got {0}".format(s))
                 elif s.name == "language" or s.name == "sung":
                     non_story_contents.append(
-                        ' '.join([unicode(x) for x in s.children]))
+                        ' '.join([str(x) for x in s.children]))
                 else:
-                    non_story_contents.append(unicode(s))
+                    non_story_contents.append(str(s))
             except RuntimeError:
                 raise
             except Exception:
diff --git a/egs/hub4_english/s5/local/data_prep/process_1996_csr_hub4_lm_filelist.py b/egs/hub4_english/s5/local/data_prep/process_1996_csr_hub4_lm_filelist.py
index 95aa7ddb831..fb5ba7a64ee 100755
--- a/egs/hub4_english/s5/local/data_prep/process_1996_csr_hub4_lm_filelist.py
+++ b/egs/hub4_english/s5/local/data_prep/process_1996_csr_hub4_lm_filelist.py
@@ -36,9 +36,9 @@ def get_args():
     corpus (LDC98T31).""")
     parser.add_argument("--verbose", choices=[0,1,2,3], type=int, default=0,
                         help="Set higher for more verbose logging.")
-    parser.add_argument("file_list", type=str,
+    parser.add_argument("file_list",
                         help="""List of compressed source files""")
-    parser.add_argument("dir", type=str,
+    parser.add_argument("dir",
                         help="Output directory to dump processed files to")
 
     args = parser.parse_args()
@@ -83,7 +83,7 @@ def process_file_lines(lines, out_file_handle):
                 for x in para.contents:
                     try:
                         if x.name is None:
-                            normalized_text = normalize_text(unicode(x))
+                            normalized_text = normalize_text(str(x))
                             if len(normalized_text) == 0:
                                 continue
                             out_file_handle.write("{0}\n".format(
diff --git a/egs/hub4_english/s5/local/data_prep/process_na_news_text.py b/egs/hub4_english/s5/local/data_prep/process_na_news_text.py
index 94b02a766a9..08203f7ada1 100755
--- a/egs/hub4_english/s5/local/data_prep/process_na_news_text.py
+++ b/egs/hub4_english/s5/local/data_prep/process_na_news_text.py
@@ -38,10 +38,10 @@ def get_args():
     parser = argparse.ArgumentParser("Prepare NA News Text corpus (LDC95T21).")
     parser.add_argument("--verbose", type=int, choices=[0, 1, 2, 3], default=0,
                         help="Use larger verbosity for more verbose logging.")
-    parser.add_argument("file_list", type=str,
+    parser.add_argument("file_list",
                         help="List of compressed source files for NA News Text. "
                         "e.g: /export/corpora/LDC/LDC95T21/na_news_1/latwp/1994")
-    parser.add_argument("out_file", type=str,
+    parser.add_argument("out_file",
                         help="Output file to write to.")
 
     args = parser.parse_args()
@@ -85,7 +85,7 @@ def process_file_lines(lines, out_file_handle):
                 continue
             for para in art.find_all('p'):
                 assert para.name == 'p'
-                text = ' '.join([unicode(x).strip() for x in para.contents])
+                text = ' '.join([str(x).strip() for x in para.contents])
                 normalized_text = normalize_text(text)
                 out_file_handle.write("{0}\n".format(
                     normalized_text.encode('ascii')))
diff --git a/egs/hub4_english/s5/local/lm/merge_word_counts.py b/egs/hub4_english/s5/local/lm/merge_word_counts.py
index 6338cbbf875..85e15d8dc07 100755
--- a/egs/hub4_english/s5/local/lm/merge_word_counts.py
+++ b/egs/hub4_english/s5/local/lm/merge_word_counts.py
@@ -7,6 +7,7 @@
 A min-count argument is required to only write counts that are above the
 specified minimum count.
 """
+from __future__ import print_function
 
 import sys
 
@@ -21,7 +22,7 @@ def main():
         parts = line.strip().split()
         words[parts[1]] = words.get(parts[1], 0) + int(parts[0])
 
-    for word, count in words.iteritems():
+    for word, count in words.items():
         if count >= int(sys.argv[1]):
             print ("{0} {1}".format(count, word))
 
diff --git a/egs/hub4_spanish/s5/local/lexicon/make_unicode_lexicon.py b/egs/hub4_spanish/s5/local/lexicon/make_unicode_lexicon.py
index 25f26f38a4f..69b4e374b6e 100755
--- a/egs/hub4_spanish/s5/local/lexicon/make_unicode_lexicon.py
+++ b/egs/hub4_spanish/s5/local/lexicon/make_unicode_lexicon.py
@@ -106,6 +106,7 @@
 # Import Statements
 
 from __future__ import print_function
+from __future__ import division
 import codecs
 import argparse
 import unicodedata
@@ -338,8 +339,8 @@ def encode(unicode_transcription, tag_percentage, log=False):
     graph2int = {v: k for k, v in enumerate(set(graph_list))}
     int2graph = {v: k for k, v in graph2int.items()}
     graph_list_int = [graph2int[g] for g in graph_list]
-    bin_edges = range(0, len(int2graph.keys()) + 1)
-    graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0] / float(len(graph_list_int))
+    bin_edges = list(range(0, len(int2graph.keys()) + 1))
+    graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0]/ float(len(graph_list_int))
     # Set count threshold to frequency that tags the bottom 10% of graphemes
     bottom_idx = int(np.floor(tag_percentage * len(graph_counts)))
     count_thresh = sorted(graph_counts)[bottom_idx]
@@ -464,7 +465,7 @@ def encode(unicode_transcription, tag_percentage, log=False):
     for g_dict in table:
         g_map = ""
         map_number = 0
-        for g_field, g_val in sorted(g_dict.iteritems()):
+        for g_field, g_val in sorted(g_dict.items()):
             if(g_field == ("MAP" + str(map_number))):
                 g_map = g_map + g_val + " "
                 map_number = map_number + 1
@@ -594,7 +595,7 @@ def write_map(grapheme_map, mapfile):
 
     '''
     with codecs.open(mapfile, 'w', encoding='utf-8') as f:
-        for g, g_map in grapheme_map.iteritems():
+        for g, g_map in grapheme_map.items():
             print(g, g_map, file=f)
 
 
@@ -612,14 +613,14 @@ def write_lexicon(baseforms, encoded_transcription, outfile, sil_lex=None,
     with codecs.open(outfile, "w", "utf-8") as f:
         # First write the non-speech words
         try:
-            for w in sil_lex.iterkeys():
+            for w in sil_lex.keys():
                 f.write("%s\t%s\n" % (w, sil_lex[w]))
         except AttributeError:
             pass
         
         # Then write extra-speech words 
         try:
-            for w in extra_lex.iterkeys():
+            for w in extra_lex.keys():
                 f.write("%s\t%s\n" % (w, extra_lex[w]))
         except AttributeError:
             pass
@@ -628,9 +629,9 @@ def write_lexicon(baseforms, encoded_transcription, outfile, sil_lex=None,
         for idx, w in enumerate(baseforms):
             # This is really just for BABEL in case <hes> is written as a word
             if(w[0].lower() == "<hes>"):
-                f.write("%s\t<hes>\n" % (unicode(w[0])))
+                f.write("%s\t<hes>\n" % (str(w[0])))
             else:
-                f.write("%s\t%s\n" % (unicode(w[0]),
+                f.write("%s\t%s\n" % (str(w[0]),
                                       encoded_transcription[idx]))
 
 if __name__ == "__main__":
diff --git a/egs/hub4_spanish/s5/local/prepare_unicode_dict.py b/egs/hub4_spanish/s5/local/prepare_unicode_dict.py
index 86fa4d60ba1..3b9dc1abd86 100755
--- a/egs/hub4_spanish/s5/local/prepare_unicode_dict.py
+++ b/egs/hub4_spanish/s5/local/prepare_unicode_dict.py
@@ -89,7 +89,7 @@ def extract_phonemes(lexicon):
     # Read all baseform units into dictionary with {a: [a, a_1, a_2],
     #                                               b: [b_1, b_3], ...}
     phonemes_dict = {}
-    for word, pron in lexicon.iteritems():
+    for word, pron in lexicon.items():
         for p in pron.split():
             try:
                 base = p.split("_",1)[0]
@@ -98,11 +98,11 @@ def extract_phonemes(lexicon):
                 phonemes_dict[base] = [p]
 
     # Makes sure there are no repeats in the list
-    phonemes_dict = {k: set(v) for k, v in phonemes_dict.iteritems()}
+    phonemes_dict = {k: set(v) for k, v in phonemes_dict.items()}
 
     # Get all unique phonemes
     phonemes = []
-    for v in phonemes_dict.itervalues():
+    for v in phonemes_dict.values():
         for p in v:
             phonemes.append(p)
 
@@ -137,11 +137,11 @@ def write_extra_questions(nonsil_phonemes, nonsil_phonemes_dict,
 
         # Write all possible phone_tag combinations that occur in the lexicon
         for tag in tags:
-            for p in nonsil_phonemes_dict.iterkeys():
+            for p in nonsil_phonemes_dict.keys():
                 tagged_phoneme = "_".join([p, tag])
                 if(tagged_phoneme in nonsil_phonemes_dict[p]):
                     fp.write("%s " % tagged_phoneme)
-            for p in sil_phonemes_dict.iterkeys():
+            for p in sil_phonemes_dict.keys():
                 tagged_phoneme = "_".join([p, tag])
                 if(tagged_phoneme in sil_phonemes_dict[p]):
                     fp.write("%s " % tagged_phoneme)
diff --git a/egs/iam/v2/local/gen_topo.py b/egs/iam/v2/local/gen_topo.py
index 540bfbcf270..8ffc59c5788 100755
--- a/egs/iam/v2/local/gen_topo.py
+++ b/egs/iam/v2/local/gen_topo.py
@@ -9,6 +9,7 @@
 # the number of states for other characters.
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import string
 
@@ -19,11 +20,11 @@
 parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
 parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
 parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
-parser.add_argument("nonsilence_phones", type=str,
+parser.add_argument("nonsilence_phones",
                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
-parser.add_argument("silence_phones", type=str,
+parser.add_argument("silence_phones",
                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
-parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+parser.add_argument("phone_list", help="file containing all phones and their corresponding number.");
 
 args = parser.parse_args()
 
@@ -47,8 +48,8 @@
 print("</ForPhones>")
 for x in range(0, args.num_nonsil_states):
     xp1 = x + 1
-    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
-print("<State> " + str(args.num_nonsil_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(x, xp1))
+print("<State> {} </State>".format(args.num_nonsil_states))
 print("</TopologyEntry>")
 
 # For nonsilence phones that ar punctuations
@@ -58,8 +59,8 @@
 print("</ForPhones>")
 for x in range(0, args.num_punctuation_states):
     xp1 = x + 1
-    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
-print("<State> " + str(args.num_punctuation_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(x, xp1))
+print("<State> {} </State>".format(args.num_punctuation_states))
 print("</TopologyEntry>")
 
 # For silence phones
@@ -72,21 +73,21 @@
     
     state_str = "<State> 0 <PdfClass> 0 "
     for x in range(0, (args.num_sil_states - 1)):
-        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+        state_str = "{}<Transition> {} {} ".format(state_str, x, transp))
     state_str = state_str + "</State>"
     print(state_str)
 
     for x in range(1, (args.num_sil_states - 1)):
-        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        state_str = "<State> {0} <PdfClass {0} ".format(x))
         for y in range(1, args.num_sil_states):
-            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+            state_str = "{}<Transition> {} {} ".format(state_str, y, transp))
         state_str = state_str + "</State>"
         print(state_str)
     second_last = args.num_sil_states - 1
-    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
-    print("<State> " + str(args.num_sil_states) + " </State>")
+    print("<State> {0} <PdfClass> {0} <Transition> {0} 0.75 <Transition> {1} 0.25 </State>".format(second_last, args.num_sil_states))
+    print("<State> {} </State>".format(args.num_sil_states))
 else:
     print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
-    print("<State> " + str(args.num_sil_states) + " </State>")
+    print("<State> {} </State>".format(args.num_sil_states))
 print("</TopologyEntry>")
 print("</Topology>")
diff --git a/egs/ifnenit/v1/README.txt b/egs/ifnenit/README.txt
similarity index 100%
rename from egs/ifnenit/v1/README.txt
rename to egs/ifnenit/README.txt
diff --git a/egs/ifnenit/v1/local/make_features.py b/egs/ifnenit/v1/local/make_features.py
index 3a485e32eb1..87afa37c00a 100755
--- a/egs/ifnenit/v1/local/make_features.py
+++ b/egs/ifnenit/v1/local/make_features.py
@@ -10,7 +10,7 @@
     
     eg. local/make_features.py data/train --feat-dim 40
 """
-
+from __future__ import division
 
 import argparse
 import os
@@ -24,8 +24,8 @@
 signal(SIGPIPE,SIG_DFL)
 
 parser = argparse.ArgumentParser(description="""Generates and saves the feature vectors""")
-parser.add_argument('dir', type=str, help='directory of images.scp and is also output directory')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output feature file')
+parser.add_argument('dir', help='directory of images.scp and is also output directory')
+parser.add_argument('--out-ark', default='-', help='where to write the output feature file')
 parser.add_argument('--feat-dim', type=int, default=40, help='size to scale the height of all images')
 parser.add_argument('--padding', type=int, default=5, help='size to scale the height of all images')
 args = parser.parse_args()
@@ -42,7 +42,7 @@ def write_kaldi_matrix(file_handle, matrix, key):
         if num_cols != len(matrix[row_index]):
             raise Exception("All the rows of a matrix are expected to "
                             "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        file_handle.write(" ".join([str(x) for x in matrix[row_index]]))
         if row_index != num_rows - 1:
             file_handle.write("\n")
     file_handle.write(" ]\n")
@@ -51,7 +51,7 @@ def get_scaled_image(im):
     scale_size = args.feat_dim
     sx = im.shape[1]
     sy = im.shape[0]
-    scale = (1.0 * scale_size) / sy
+    scale = (1.0 * scale_size)/ sy
     nx = int(scale_size)
     ny = int(scale * sx)
     im = misc.imresize(im, (nx, ny))
diff --git a/egs/librispeech/s5/local/chain/run_tdnn_lstm.sh b/egs/librispeech/s5/local/chain/run_tdnn_lstm.sh
new file mode 120000
index 00000000000..a4fa11e0908
--- /dev/null
+++ b/egs/librispeech/s5/local/chain/run_tdnn_lstm.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_lstm_1b.sh
\ No newline at end of file
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
new file mode 100755
index 00000000000..812bf5e7fc5
--- /dev/null
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# this is the tdnn-lstmp based on the run_tdnn_lstm_1n.sh under Switchboard.
+
+# training acoustic model and decoding:
+#     local/chain/tuning/run_tdnn_lstm_1a.sh
+# System                      tdnn_lstm1a_sp
+# WER on dev(fglarge)              3.44
+# WER on dev(tglarge)              3.55
+# WER on dev_other(fglarge)        8.63
+# WER on dev_other(tglarge)        9.09
+# WER on test(fglarge)             3.78
+# WER on test(tglarge)             3.94
+# WER on test_other(fglarge)       8.83
+# WER on test_other(tglarge)       9.09
+# Final train prob              -0.0452
+# Final valid prob              -0.0477
+# Final train prob (xent)       -0.7874
+# Final valid prob (xent)       -0.8150
+# Num-parameters               27790288
+# exp/chain_cleaned/tdnn_lstm1a_sp/: num-iters=1303 nj=3..16 num-params=27.8M dim=40+100->6056 combine=-0.041->-0.040 (over 9) xent:train/valid[867,1302,final]=(-1.15,-0.782,-0.787/-1.18,-0.810,-0.815) logprob:train/valid[867,1302,final]=(-0.063,-0.047,-0.045/-0.062,-0.049,-0.048)
+
+set -e
+
+# configs for 'chain'
+stage=12
+train_stage=-10
+get_egs_stage=-10
+speed_perturb=true
+affix=1a
+decode_iter=
+decode_nj=50
+
+# LSTM training options
+frames_per_chunk=140,100,160
+frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
+chunk_left_context=40
+chunk_right_context=0
+xent_regularize=0.025
+self_repair_scale=0.00001
+label_delay=5
+# decode options
+extra_left_context=50
+extra_right_context=0
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+remove_egs=false
+common_egs_dir=
+nnet3_affix=_cleaned
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  suffix=_sp
+fi
+
+gmm=tri6b_cleaned
+dir=exp/chain${nnet3_affix}/tdnn_lstm${affix}${suffix}
+train_set=train_960_cleaned
+ali_dir=exp/${gmm}_ali_${train_set}_sp_comb
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+train_data_dir=data/${train_set}_sp_hires_comb
+lores_train_data_dir=data/${train_set}_sp_comb
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  lstm_opts="l2-regularize=0.0005 decay-time=40"
+  output_opts="l2-regularize=0.0005 output-delay=$label_delay max-change=1.5 dim=$num_targets"
+
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=256 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=256 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm1l dim=256 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=128 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm2l dim=256 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm2 cell-dim=1280 recurrent-projection-dim=256 non-recurrent-projection-dim=128 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=256 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm3l dim=256 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm3 cell-dim=1280 recurrent-projection-dim=256 non-recurrent-projection-dim=128 delay=-3 dropout-proportion=0.0 $lstm_opts
+
+  output-layer name=output input=lstm3  include-log-softmax=false $output_opts
+
+  output-layer name=output-xent input=lstm3 learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+      /export/c0{1,2,5,7}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.max-param-change 2.0 \
+    --trainer.num-epochs 6 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.momentum 0.0 \
+    --trainer.deriv-truncate-margin 8 \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_chunk \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+
+graph_dir=$dir/graph_tgsmall
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir
+  # remove <UNK> from the graph, and convert back to const-FST.
+  fstrmsymbols --apply-to-output=true --remove-arcs=true "echo 3|" $graph_dir/HCLG.fst - | \
+    fstconvert --fst_type=const > $graph_dir/temp.fst
+  mv $graph_dir/temp.fst $graph_dir/HCLG.fst
+fi
+
+
+iter_opts=
+if [ ! -z $decode_iter ]; then
+  iter_opts=" --iter $decode_iter "
+fi
+if [ $stage -le 15 ]; then
+  rm $dir/.error 2>/dev/null || true
+  for decode_set in test_clean test_other dev_clean dev_other; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+		  --extra-left-context $extra_left_context \
+          --extra-right-context $extra_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk "$frames_per_chunk_primary" \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1
+      steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tglarge} || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,fglarge} || exit 1
+      ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
new file mode 100755
index 00000000000..d9f20fae011
--- /dev/null
+++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+# this is the tdnn-lstmp based on the run_tdnn_lstm_1a.sh under Librispeech but with larger model size.
+
+# training acoustic model and decoding:
+#     local/chain/tuning/run_tdnn_lstm_1b.sh
+# local/chain/compare_wer.sh exp/chain_cleaned/tdnn_lstm1a_sp exp/chain_cleaned/tdnn_lstm1b_sp
+# System                      tdnn_lstm1a_sp tdnn_lstm1b_sp
+# WER on dev(fglarge)              3.44      3.36
+# WER on dev(tglarge)              3.55      3.48
+# WER on dev(tgmed)                4.41      4.26
+# WER on dev(tgsmall)              4.82      4.71
+# WER on dev_other(fglarge)        8.63      8.43
+# WER on dev_other(tglarge)        9.09      8.94
+# WER on dev_other(tgmed)         10.99     10.65
+# WER on dev_other(tgsmall)       11.95     11.51
+# WER on test(fglarge)             3.78      3.83
+# WER on test(tglarge)             3.94      3.93
+# WER on test(tgmed)               4.68      4.72
+# WER on test(tgsmall)             5.11      5.10
+# WER on test_other(fglarge)       8.83      8.69
+# WER on test_other(tglarge)       9.09      9.10
+# WER on test_other(tgmed)        11.05     10.86
+# WER on test_other(tgsmall)      12.18     11.83
+# Final train prob              -0.0452   -0.0417
+# Final valid prob              -0.0477   -0.0459
+# Final train prob (xent)       -0.7874   -0.7488
+# Final valid prob (xent)       -0.8150   -0.7757
+# Num-parameters               27790288  45245520
+
+# rnn-lm rescoring:
+#     local/rnnlm/tuning/run_tdnn_lstm_1a.sh --ac-model-dir exp/chain_cleaned/tdnn_lstm1b_sp/
+# System                      tdnn_lstm1b_sp
+# WER on dev(fglarge_nbe_rnnlm)      2.73
+# WER on dev(fglarge_lat_rnnlm)        2.83
+# WER on dev(fglarge)              3.36
+# WER on dev(tglarge)              3.48
+# WER on dev_other(fglarge_nbe_rnnlm)      7.20
+# WER on dev_other(fglarge_lat_rnnlm)      7.23
+# WER on dev_other(fglarge)        8.43
+# WER on dev_other(tglarge)        8.94
+# WER on test(fglarge_nbe_rnnlm)      3.10
+# WER on test(fglarge_lat_rnnlm)       3.22
+# WER on test(fglarge)             3.83
+# WER on test(tglarge)             3.93
+# WER on test_other(fglarge_nbe_rnnlm)      7.54
+# WER on test_other(fglarge_lat_rnnlm)      7.65
+# WER on test_other(fglarge)       8.69
+# WER on test_other(tglarge)       9.10
+# Final train prob              -0.0417
+# Final valid prob              -0.0459
+# Final train prob (xent)       -0.7488
+# Final valid prob (xent)       -0.7757
+# Num-parameters               45245520
+
+
+
+set -e
+
+# configs for 'chain'
+stage=12
+train_stage=-10
+get_egs_stage=-10
+speed_perturb=true
+affix=1b
+decode_iter=
+decode_nj=50
+
+# LSTM training options
+frames_per_chunk=140,100,160
+frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1)
+chunk_left_context=40
+chunk_right_context=0
+xent_regularize=0.025
+self_repair_scale=0.00001
+label_delay=5
+# decode options
+extra_left_context=50
+extra_right_context=0
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+
+remove_egs=false
+common_egs_dir=
+nnet3_affix=_cleaned
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
+# run those things.
+
+suffix=
+if [ "$speed_perturb" == "true" ]; then
+  suffix=_sp
+fi
+
+gmm=tri6b_cleaned
+dir=exp/chain${nnet3_affix}/tdnn_lstm${affix}${suffix}
+train_set=train_960_cleaned
+ali_dir=exp/${gmm}_ali_${train_set}_sp_comb
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+train_data_dir=data/${train_set}_sp_hires_comb
+lores_train_data_dir=data/${train_set}_sp_comb
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
+
+if [ $stage -le 12 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+  opts="l2-regularize=0.002"
+  linear_opts="orthonormal-constraint=1.0"
+  lstm_opts="l2-regularize=0.0005 decay-time=40"
+  output_opts="l2-regularize=0.0005 output-delay=$label_delay max-change=1.5 dim=$num_targets"
+
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-layer name=tdnn1 $opts dim=1280
+  linear-component name=tdnn2l dim=320 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn2 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn3l dim=320 $linear_opts
+  relu-batchnorm-layer name=tdnn3 $opts dim=1280
+  linear-component name=tdnn4l dim=320 $linear_opts input=Append(-1,0)
+  relu-batchnorm-layer name=tdnn4 $opts input=Append(0,1) dim=1280
+  linear-component name=tdnn5l dim=320 $linear_opts
+  relu-batchnorm-layer name=tdnn5 $opts dim=1280 input=Append(tdnn5l, tdnn3l)
+  linear-component name=tdnn6l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn6 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm1l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm1 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn7 $opts input=Append(0,3,tdnn6l,tdnn4l,tdnn2l) dim=1280
+  linear-component name=tdnn8l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn8 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm2l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm2 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384 delay=-3 dropout-proportion=0.0 $lstm_opts
+  relu-batchnorm-layer name=tdnn9 $opts input=Append(0,3,tdnn8l,tdnn6l,tdnn4l) dim=1280
+  linear-component name=tdnn10l dim=320 $linear_opts input=Append(-3,0)
+  relu-batchnorm-layer name=tdnn10 $opts input=Append(0,3) dim=1280
+  linear-component name=lstm3l dim=320 $linear_opts input=Append(-3,0)
+  fast-lstmp-layer name=lstm3 cell-dim=1536 recurrent-projection-dim=384 non-recurrent-projection-dim=384: delay=-3 dropout-proportion=0.0 $lstm_opts
+
+  output-layer name=output input=lstm3  include-log-softmax=false $output_opts
+
+  output-layer name=output-xent input=lstm3 learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+if [ $stage -le 13 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+      /export/c0{1,2,5,7}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --cmd "$decode_cmd" \
+    --feat.online-ivector-dir $train_ivector_dir \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.0 \
+    --chain.apply-deriv-weights false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.num-chunk-per-minibatch 64,32 \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.max-param-change 2.0 \
+    --trainer.num-epochs 6 \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.momentum 0.0 \
+    --trainer.deriv-truncate-margin 8 \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--frames-overlap-per-eg 0" \
+    --egs.chunk-width $frames_per_chunk \
+    --egs.chunk-left-context $chunk_left_context \
+    --egs.chunk-right-context $chunk_right_context \
+    --egs.chunk-left-context-initial 0 \
+    --egs.chunk-right-context-final 0 \
+    --egs.dir "$common_egs_dir" \
+    --cleanup.remove-egs $remove_egs \
+    --feat-dir $train_data_dir \
+    --tree-dir $tree_dir \
+    --lat-dir $lat_dir \
+    --dir $dir  || exit 1;
+fi
+
+
+graph_dir=$dir/graph_tgsmall
+if [ $stage -le 14 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir
+  # remove <UNK> from the graph, and convert back to const-FST.
+  fstrmsymbols --apply-to-output=true --remove-arcs=true "echo 3|" $graph_dir/HCLG.fst - | \
+    fstconvert --fst_type=const > $graph_dir/temp.fst
+  mv $graph_dir/temp.fst $graph_dir/HCLG.fst
+fi
+
+
+iter_opts=
+if [ ! -z $decode_iter ]; then
+  iter_opts=" --iter $decode_iter "
+fi
+if [ $stage -le 15 ]; then
+  rm $dir/.error 2>/dev/null || true
+  for decode_set in test_clean test_other dev_clean dev_other; do
+      (
+      steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+          --nj $decode_nj --cmd "$decode_cmd" $iter_opts \
+		  --extra-left-context $extra_left_context \
+          --extra-right-context $extra_right_context \
+          --extra-left-context-initial 0 \
+          --extra-right-context-final 0 \
+          --frames-per-chunk "$frames_per_chunk_primary" \
+          --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
+          $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1
+      steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tglarge} || exit 1
+      steps/lmrescore_const_arpa.sh \
+          --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
+          data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,fglarge} || exit 1
+      ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
diff --git a/egs/librispeech/s5/local/lm/python/text_post_process.py b/egs/librispeech/s5/local/lm/python/text_post_process.py
index 4ffbbe04b1f..344c1b291bd 100755
--- a/egs/librispeech/s5/local/lm/python/text_post_process.py
+++ b/egs/librispeech/s5/local/lm/python/text_post_process.py
@@ -21,10 +21,10 @@ def parse_args():
     parser.add_argument('--abort-long-sent', type=bool, default=False,
                         help='If True and a sentence longer than "max-sent-len" detected' +\
                              'exit with error code 1. If False, just split the long sentences.')
-    parser.add_argument('--sent-end-marker', type=str, default="DOTDOTDOT")
-    parser.add_argument("in_text", type=str, help="Input text")
-    parser.add_argument("out_text", type=str, help="Output text")
-    parser.add_argument("sent_bounds", type=str,
+    parser.add_argument('--sent-end-marker', default="DOTDOTDOT")
+    parser.add_argument("in_text", help="Input text")
+    parser.add_argument("out_text", help="Output text")
+    parser.add_argument("sent_bounds",
                         help="A file that will contain a comma separated list of numbers, s.t. if" +
                              "i is in this list, then there is a sententence break after token i")
     return parser.parse_args()
@@ -66,7 +66,7 @@ def parse_args():
                 n_tokens += 1
                 start_scan = 4
                 current_line.append('SUN')
-            for i in xrange(start_scan, len(opl_tokens)):
+            for i in range(start_scan, len(opl_tokens)):
                 m = re.match("^[A-Z]+\'?[A-Z\']*$", opl_tokens[i])
                 if m is not None:
                     n_tokens += 1
diff --git a/egs/librispeech/s5/local/lm/python/text_pre_process.py b/egs/librispeech/s5/local/lm/python/text_pre_process.py
index 6228079b3a3..b75d0711d13 100755
--- a/egs/librispeech/s5/local/lm/python/text_pre_process.py
+++ b/egs/librispeech/s5/local/lm/python/text_pre_process.py
@@ -20,13 +20,13 @@
 
 def parse_args():
     parser = argparse.ArgumentParser(description="Pre-process a book's text")
-    parser.add_argument("--in-encoding", type=str, default="utf-8",
+    parser.add_argument("--in-encoding", default="utf-8",
                         help="Encoding to use when reading the input text")
-    parser.add_argument("--out-encoding", type=str, default="ascii",
+    parser.add_argument("--out-encoding", default="ascii",
                         help="Encoding to use when writing the output text")
-    parser.add_argument('--sent-end-marker', type=str, default="DOTDOTDOT")
-    parser.add_argument("in_text", type=str, help="Input text")
-    parser.add_argument("out_text", type=str, help="Output text")
+    parser.add_argument('--sent-end-marker', default="DOTDOTDOT")
+    parser.add_argument("in_text", help="Input text")
+    parser.add_argument("out_text", help="Output text")
     return parser.parse_args()
 
 # http://rosettacode.org/wiki/Roman_numerals/Decode#Python
diff --git a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
index 257e497017b..137a972f3d9 100755
--- a/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
+++ b/egs/librispeech/s5/local/rnnlm/tuning/run_tdnn_lstm_1a.sh
@@ -3,23 +3,23 @@
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 #           2018  Ke Li
 
-# This script trains LMs on the librispeech 960 hours training data.
+# This script trains LMs on the librispeech-lm-norm.txt.gz.
 
-# rnnlm/train_rnnlm.sh: best iteration (out of 26) was 21, linking it to final iteration.
-# rnnlm/train_rnnlm.sh: train/dev perplexity was 118.4 / 152.6.
-# Train objf: -5.74 -5.51 -5.38 -5.29 -5.22 -5.16 -5.12 -5.08 -5.05 -5.02 -4.99 -4.97 -4.97 -4.93 -4.90 -4.87 -4.84 -4.82 -4.79 -4.77 -4.75 -4.73 -4.71 -4.69 -4.67
-# Dev objf:   -6.00 -5.61 -5.45 -5.36 -5.29 -5.24 -5.20 -5.18 -5.16 -5.13 -5.12 -5.11 -5.11 -5.09 -5.07 -5.06 -5.05 -5.04 -5.03 -5.03 -5.03 -5.03 -5.03 -5.03 -5.03 -5.03
+# rnnlm/train_rnnlm.sh: best iteration (out of 143) was 142, linking it to final iteration.
+# rnnlm/train_rnnlm.sh: train/dev perplexity was 109.2 / 110.7.
+# Train objf: -5.74 -5.54 -5.44 -5.37 -5.32 -5.28 -5.25 -5.23 -5.20 -5.18 -5.15 -5.14 -5.12 -5.10 -5.09 -5.08 -5.07 -5.05 -5.04 -5.04 -5.03 -5.02 -5.01 -5.00 -4.99 -4.99 -4.98 -4.97 -4.96 -4.96 -4.95 -4.95 -4.94 -4.93 -4.93 -4.92 -4.92 -4.92 -4.91 -4.90 -4.90 -4.89 -4.89 -4.89 -4.88 -4.88 -4.87 -4.87 -4.87 -4.86 -4.86 -4.86 -4.85 -4.85 -4.84 -4.84 -4.84 -4.84 -4.84 -4.83 -4.83 -4.83 -4.82 -4.82 -4.82 -4.82 -4.81 -4.81 -4.81 -4.81 -4.80 -4.80 -4.80 -4.79 -4.79 -4.79 -4.79 -4.78 -4.79 -4.78 -4.78 -4.78 -4.78 -4.77 -4.77 -4.77 -4.77 -4.77 -4.76 -4.76 -4.76 -4.76 -4.76 -4.75 -4.75 -4.75 -4.75 -4.75 -4.74 -4.74 -4.74 -4.74 -4.74 -4.74 -4.73 -4.74 -4.74 -4.73 -4.73 -4.73 -4.73 -4.73 -4.72 -4.73 -4.73 -4.73 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.71 -4.71 -4.71 -4.71 -4.71 -4.70 -4.70 -4.70 -4.70 -4.70 -4.69 -4.69 -4.69 -4.69 -4.69 -4.69 -4.68 -4.68
+# Dev objf:   -5.99 -5.65 -5.53 -5.44 -5.38 -5.34 -5.30 -5.27 -5.22 -5.20 -5.18 -5.16 -5.14 -5.12 -5.11 -5.10 -5.09 -5.08 -5.07 -5.05 -5.04 -5.04 -5.03 -5.01 -5.00 -4.99 -4.99 -4.98 -4.97 -4.97 0.00 -4.96 -4.95 -4.95 -4.94 -4.93 -4.93 -4.92 -4.92 -4.91 -4.91 -4.90 -4.90 -4.89 -4.89 -4.89 -4.88 -4.88 -4.88 -4.87 -4.87 -4.87 -4.86 -4.86 -4.85 -4.85 -4.87 -4.84 -4.84 -4.84 -4.83 -4.91 -4.83 -4.83 -4.83 -4.82 -4.82 -4.82 -4.82 -4.81 -4.81 -4.81 -4.80 -4.80 -4.80 -4.80 -4.80 -4.79 -4.79 -4.79 -4.79 -4.79 -4.79 -4.78 -4.78 -4.79 -4.78 -4.77 -4.77 -4.77 -4.77 -4.77 -4.77 -4.77 -4.76 -4.76 -4.76 -4.76 -4.76 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.75 -4.74 -4.74 -4.74 -4.74 -4.74 -4.74 -4.74 -4.73 -4.74 -4.73 -4.73 -4.73 -4.73 -4.73 -4.73 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.72 -4.71 -4.71 -4.71 -4.71 -4.71 -4.71 -4.71 -4.71
 
 # WER summary on dev and test sets
 # System                      tdnn_1d_sp  +lattice_rescore  +nbest_rescore 
-# WER on dev(fglarge)              3.34         2.97            2.98
-# WER on dev(tglarge)              3.44         3.02            3.07
-# WER on dev_other(fglarge)        8.70         7.98            8.00
-# WER on dev_other(tglarge)        9.25         8.28            8.35
-# WER on test(fglarge)             3.77         3.41            3.40
-# WER on test(tglarge)             3.85         3.50            3.47
-# WER on test_other(fglarge)       8.91         8.22            8.21
-# WER on test_other(tglarge)       9.31         8.55            8.49
+# WER on dev(fglarge)              3.34         2.71            2.62
+# WER on dev(tglarge)              3.44         2.75            2.66
+# WER on dev_other(fglarge)        8.70         7.37            7.55
+# WER on dev_other(tglarge)        9.25         7.56            7.73
+# WER on test(fglarge)             3.77         3.12            3.06
+# WER on test(tglarge)             3.85         3.18            3.11
+# WER on test_other(fglarge)       8.91         7.63            7.68
+# WER on test_other(tglarge)       9.31         7.83            7.95
 
 # command to get the WERs above:
 # tdnn_1d_sp
@@ -37,7 +37,7 @@ lstm_rpd=256
 lstm_nrpd=256
 stage=-10
 train_stage=-10
-epochs=20
+epochs=4
 
 # variables for lattice rescoring
 run_lat_rescore=true
@@ -54,23 +54,25 @@ pruned_rescore=true
 . ./cmd.sh
 . ./utils/parse_options.sh
 
-# test of 960 hours training transcriptions
-text=data/train_960/text
+text=data/local/lm/librispeech-lm-norm.txt.gz
 lexicon=data/lang_nosp/words.txt
-text_dir=data/rnnlm/text_960_1a
+text_dir=data/rnnlm/text
 mkdir -p $dir/config
 set -e
 
-for f in $text $lexicon; do
+for f in $lexicon; do
   [ ! -f $f ] && \
     echo "$0: expected file $f to exist; search for run.sh in run.sh" && exit 1
 done
 
 if [ $stage -le 0 ]; then
   mkdir -p $text_dir
+  if [ ! -f $text ]; then
+    wget http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz -P data/local/lm 
+  fi
   echo -n >$text_dir/dev.txt
-  # hold out one in every 50 lines as dev data.
-  cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/librispeech.txt
+  # hold out one in every 2000 lines as dev data.
+  gunzip -c $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%2000 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/librispeech.txt
 fi
 
 if [ $stage -le 1 ]; then
@@ -119,7 +121,7 @@ if [ $stage -le 2 ]; then
 fi
 
 if [ $stage -le 3 ]; then
-  rnnlm/train_rnnlm.sh --num-jobs-final 2 \
+  rnnlm/train_rnnlm.sh --num-jobs-final 8 \
                        --stage $train_stage \
                        --num-epochs $epochs \
                        --cmd "$train_cmd" $dir
diff --git a/egs/madcat_ar/v1/README.txt b/egs/madcat_ar/README.txt
similarity index 100%
rename from egs/madcat_ar/v1/README.txt
rename to egs/madcat_ar/README.txt
diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index 778555c427e..650a0704d80 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -13,6 +13,7 @@
  be vertically or horizontally aligned). Hence to extract line image from line bounding box,
  page image is rotated and line image is cropped and saved.
 """
+from __future__ import division
 
 import sys
 import argparse
@@ -87,8 +88,8 @@ def unit_vector(pt0, pt1):
     (float, float): unit vector
     """
     dis_0_to_1 = sqrt((pt0[0] - pt1[0])**2 + (pt0[1] - pt1[1])**2)
-    return (pt1[0] - pt0[0]) / dis_0_to_1, \
-           (pt1[1] - pt0[1]) / dis_0_to_1
+    return (pt1[0] - pt0[0])/ dis_0_to_1, \
+           (pt1[1] - pt0[1])/ dis_0_to_1
 
 
 def orthogonal_vector(vector):
@@ -130,7 +131,7 @@ def bounding_area(index, hull):
     return {'area': len_p * len_o,
             'length_parallel': len_p,
             'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'rectangle_center': (min_p + float(len_p)/ 2, min_o + float(len_o)/ 2),
             'unit_vector': unit_vector_p,
             }
 
@@ -143,7 +144,7 @@ def to_xy_coordinates(unit_vector_angle, point):
     ------
     (float, float): converted x,y coordinate of the unit vector.
     """
-    angle_orthogonal = unit_vector_angle + pi / 2
+    angle_orthogonal = unit_vector_angle + pi/ 2
     return point[0] * cos(unit_vector_angle) + point[1] * cos(angle_orthogonal), \
            point[0] * sin(unit_vector_angle) + point[1] * sin(angle_orthogonal)
 
@@ -235,8 +236,8 @@ def get_center(im):
     -------
     (int, int): center of the image
     """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
+    center_x = float(im.size[0])/ 2
+    center_y = float(im.size[1])/ 2
     return int(center_x), int(center_y)
 
 
@@ -248,9 +249,9 @@ def get_horizontal_angle(unit_vector_angle):
     (float): updated angle of the unit vector to be in radians.
              It is only in first or fourth quadrant.
     """
-    if unit_vector_angle > pi / 2 and unit_vector_angle <= pi:
+    if unit_vector_angle > pi/ 2 and unit_vector_angle <= pi:
         unit_vector_angle = unit_vector_angle - pi
-    elif unit_vector_angle > -pi and unit_vector_angle < -pi / 2:
+    elif unit_vector_angle > -pi and unit_vector_angle < -pi/ 2:
         unit_vector_angle = unit_vector_angle + pi
 
     return unit_vector_angle
@@ -354,7 +355,7 @@ def dilate_polygon(points, amount_increase):
         bisect = np.divide(bisect, np.linalg.norm(bisect))
 
         cos_theta = np.dot(next_normal, bisect)
-        hyp = amount_increase / cos_theta
+        hyp = float(amount_increase)/ cos_theta
 
         new_point = np.around(point + hyp * bisect)
         new_point = new_point.astype(int)
diff --git a/egs/madcat_ar/v1/local/download_data.sh b/egs/madcat_ar/v1/local/download_data.sh
deleted file mode 100755
index 7061be49c2a..00000000000
--- a/egs/madcat_ar/v1/local/download_data.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-# Copyright      2018  Ashish Arora
-# Apache 2.0
-
-# This script downloads data splits for MADCAT Arabic dataset.
-# It also check if madcat arabic data is present or not.
-
-download_dir1=/export/corpora/LDC/LDC2012T15/data
-download_dir2=/export/corpora/LDC/LDC2013T09/data
-download_dir3=/export/corpora/LDC/LDC2013T15/data
-train_split_url=http://www.openslr.org/resources/48/madcat.train.raw.lineid
-test_split_url=http://www.openslr.org/resources/48/madcat.test.raw.lineid
-dev_split_url=http://www.openslr.org/resources/48/madcat.dev.raw.lineid
-data_splits=data/download/data_splits
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh || exit 1;
-
-if [ -d $data_splits ]; then
-  echo "$0: Not downloading the data splits as it is already there."
-else
-  if [ ! -f $data_splits/madcat.train.raw.lineid ]; then
-    mkdir -p $data_splits
-    echo "$0: Downloading the data splits..."
-    wget -P $data_splits $train_split_url || exit 1;
-    wget -P $data_splits $test_split_url || exit 1;
-    wget -P $data_splits $dev_split_url || exit 1;
-  fi
-  echo "$0: Done downloading the data splits"
-fi
-
-if [ -d $download_dir1 ]; then
-  echo "$0: madcat arabic data directory is present."
-else
-  if [ ! -f $download_dir1/madcat/*.madcat.xml ]; then
-    echo "$0: please download madcat data..."
-  fi
-fi
diff --git a/egs/madcat_ar/v1/local/prepare_data.sh b/egs/madcat_ar/v1/local/prepare_data.sh
new file mode 100755
index 00000000000..1049db9826d
--- /dev/null
+++ b/egs/madcat_ar/v1/local/prepare_data.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2017  Hossein Hadian
+# Apache 2.0
+
+# This script downloads the data splits for MADCAT Arabic dataset and prepares the training
+# validation, and test data (i.e text, images.scp, utt2spk and spk2utt) by calling process_data.py.
+# It also uses Arabic Gigaword text corpus for language modeling.
+
+#  Eg. local/prepare_data.sh
+#  Eg. text file: LDC0001_000399_NHR_ARB_20070113.0052_11_LDC0001_0z11 
+#                 وهناك تداخل بين الرأسمالية الإسرائيلية
+#      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+#      images.scp file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 
+#                        data/local/train/1/NHR_ARB_20070113.0052_11_LDC0001_00z1.png
+
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+train_split_url=http://www.openslr.org/resources/48/madcat.train.raw.lineid
+test_split_url=http://www.openslr.org/resources/48/madcat.test.raw.lineid
+dev_split_url=http://www.openslr.org/resources/48/madcat.dev.raw.lineid
+data_splits=data/download/data_splits
+stage=0
+download_dir=data/download
+gigacorpus=data/local/gigawordcorpus
+gigaword_loc=/export/corpora5/LDC/LDC2011T11
+use_extra_corpus_text=true
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+if [ -d $data_splits ]; then
+  echo "$0: Not downloading the data splits as it is already there."
+else
+  if [ ! -f $data_splits/madcat.train.raw.lineid ]; then
+    mkdir -p $data_splits
+    echo "$0: Downloading the data splits..."
+    wget -P $data_splits $train_split_url || exit 1;
+    wget -P $data_splits $test_split_url || exit 1;
+    wget -P $data_splits $dev_split_url || exit 1;
+  fi
+  echo "$0: Done downloading the data splits"
+fi
+
+if [ -d $download_dir1 ]; then
+  echo "$0: madcat arabic data directory is present."
+else
+  if [ ! -f $download_dir1/madcat/*.madcat.xml ]; then
+    echo "$0: please download madcat data..."
+  fi
+fi
+
+mkdir -p $download_dir data/local
+if $use_extra_corpus_text; then
+  mkdir -p $gigacorpus
+  cp -r $gigaword_loc/. $gigacorpus
+  for newswire in aaw_arb afp_arb ahr_arb asb_arb hyt_arb nhr_arb qds_arb umh_arb xin_arb; do
+    for file in $gigacorpus/arb_gw_5/data/$newswire/*.gz; do
+      gzip -d $file
+    done
+    for file in $gigacorpus/arb_gw_5/data/$newswire/*; do
+      sed -e '/^<[^>]*>$/d; s/``/"/g; s/\x27\x27/"/g' $file >> $gigacorpus/arb_gw_5/data/${newswire}_combined.txt
+    done
+  done
+fi
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index e476b67cb96..71f7f39d632 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -24,23 +24,23 @@
                                  " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
                                  " data/train data/local/lines ",
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument('database_path1', type=str,
+parser.add_argument('database_path1',
                     help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('database_path2', type=str,
+parser.add_argument('database_path2',
                     help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('database_path3', type=str,
+parser.add_argument('database_path3',
                     help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('data_splits', type=str,
+parser.add_argument('data_splits',
                     help='Path to file that contains the train/test/dev split information')
-parser.add_argument('out_dir', type=str,
+parser.add_argument('out_dir',
                     help='directory location to write output files.')
-parser.add_argument('images_scp_path', type=str,
+parser.add_argument('images_scp_path',
                     help='Path of input images.scp file(maps line image and location)')
-parser.add_argument('writing_condition1', type=str,
+parser.add_argument('writing_condition1',
                     help='Path to the downloaded (and extracted) writing conditions file 1')
-parser.add_argument('writing_condition2', type=str,
+parser.add_argument('writing_condition2',
                     help='Path to the downloaded (and extracted) writing conditions file 2')
-parser.add_argument('writing_condition3', type=str,
+parser.add_argument('writing_condition3',
                     help='Path to the downloaded (and extracted) writing conditions file 3')
 parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
                    help="performs image augmentation")
@@ -192,25 +192,25 @@ def get_line_image_location():
                 if args.augment:
                     key = (line_id + '.')[:-1]
                     for i in range(0, 3):
-                        location_id = '_' + line_id + '_scale' + str(i)
+                        location_id = "_{}_scale{}".format(line_id, i)
                         line_image_file_name = base_name + location_id + '.png'
                         location = image_loc_dict[line_image_file_name]
                         image_file_path = os.path.join(location, line_image_file_name)
                         line = text_line_word_dict[key]
                         text = ' '.join(line)
                         base_line_image_file_name = line_image_file_name.split('.png')[0]
-                        utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_line_image_file_name
+                        utt_id = "{}_{}_{}".format(writer_id, str(image_num).zfill(6), base_line_image_file_name)
                         text_fh.write(utt_id + ' ' + text + '\n')
                         utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
                         image_fh.write(utt_id + ' ' + image_file_path + '\n')
                         image_num += 1
                 else:
-                    updated_base_name = base_name + '_' + str(line_id).zfill(4) +'.png'
+                    updated_base_name = "{}_{}.png".format(base_name, str(line_id).zfill(4))
                     location = image_loc_dict[updated_base_name]
                     image_file_path = os.path.join(location, updated_base_name)
                     line = text_line_word_dict[line_id]
                     text = ' '.join(line)
-                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(line_id).zfill(4)
+                    utt_id = "{}_{}_{}_{}".format(writer_id, str(image_num).zfill(6), base_line_image_file_name, str(line_id).zfill(4))
                     text_fh.write(utt_id + ' ' + text + '\n')
                     utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
                     image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index de67e444f39..bb2b4f86db1 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -19,6 +19,7 @@ images_scp_dir=data/local
 overwrite=false
 subset=false
 augment=false
+use_extra_corpus_text=true
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -35,9 +36,9 @@ if [ $stage -le 0 ]; then
     echo "Exiting with status 1 to avoid data corruption"
     exit 1;
   fi
-  echo "$0: Downloading data splits...$(date)"
-  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
-                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+  local/prepare_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3 \
+                         --use_extra_corpus_text $use_extra_corpus_text
 
   for set in test train dev; do
     data_split_file=$data_splits_dir/madcat.$set.raw.lineid
@@ -48,7 +49,7 @@ if [ $stage -le 0 ]; then
         --data data/local/$set --subset $subset --augment $augment || exit 1
   done
 
-  echo "$0: Preparing data..."
+  echo "$0: Processing data..."
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
diff --git a/egs/madcat_zh/v1/local/create_line_image_from_page_image.py b/egs/madcat_zh/v1/local/create_line_image_from_page_image.py
index be0afe6d9fc..22af571fc04 100755
--- a/egs/madcat_zh/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_zh/v1/local/create_line_image_from_page_image.py
@@ -76,8 +76,8 @@ def unit_vector(pt0, pt1):
         Eg.  0.31622776601683794, 0.9486832980505138
     """
     dis_0_to_1 = sqrt((pt0[0] - pt1[0])**2 + (pt0[1] - pt1[1])**2)
-    return (pt1[0] - pt0[0]) / dis_0_to_1, \
-           (pt1[1] - pt0[1]) / dis_0_to_1
+    return (pt1[0] - pt0[0])/ dis_0_to_1, \
+           (pt1[1] - pt0[1])/ dis_0_to_1
 
 
 def orthogonal_vector(vector):
@@ -124,7 +124,7 @@ def bounding_area(index, hull):
     return {'area': len_p * len_o,
             'length_parallel': len_p,
             'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'rectangle_center': (min_p + float(len_p)/ 2, min_o + float(len_o)/ 2),
             'unit_vector': unit_vector_p,
             }
 
@@ -140,7 +140,7 @@ def to_xy_coordinates(unit_vector_angle, point):
         (float, float): converted x,y coordinate of the unit vector.
         Eg. 0.680742447866183, 2.1299271629971663
     """
-    angle_orthogonal = unit_vector_angle + pi / 2
+    angle_orthogonal = unit_vector_angle + pi/ 2
     return point[0] * cos(unit_vector_angle) + point[1] * cos(angle_orthogonal), \
            point[0] * sin(unit_vector_angle) + point[1] * sin(angle_orthogonal)
 
@@ -246,8 +246,8 @@ def get_center(im):
         (int, int): center of the image
         Eg.  2550, 3300
     """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
+    center_x = float(im.size[0])/ 2
+    center_y = float(im.size[1])/ 2
     return int(center_x), int(center_y)
 
 
@@ -262,9 +262,9 @@ def get_horizontal_angle(unit_vector_angle):
         Eg. 0.01543.
     """
 
-    if unit_vector_angle > pi / 2 and unit_vector_angle <= pi:
+    if unit_vector_angle > pi/ 2 and unit_vector_angle <= pi:
         unit_vector_angle = unit_vector_angle - pi
-    elif unit_vector_angle > -pi and unit_vector_angle < -pi / 2:
+    elif unit_vector_angle > -pi and unit_vector_angle < -pi/ 2:
         unit_vector_angle = unit_vector_angle + pi
 
     return unit_vector_angle
diff --git a/egs/madcat_zh/v1/local/process_data.py b/egs/madcat_zh/v1/local/process_data.py
index dbee815953a..994a4486420 100755
--- a/egs/madcat_zh/v1/local/process_data.py
+++ b/egs/madcat_zh/v1/local/process_data.py
@@ -23,11 +23,11 @@
                                  " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
                                  " data/train data/local/lines ",
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument('database_path1', type=str,
+parser.add_argument('database_path1',
                     help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('data_splits', type=str,
+parser.add_argument('data_splits',
                     help='Path to file that contains the train/test/dev split information')
-parser.add_argument('out_dir', type=str,
+parser.add_argument('out_dir',
                     help='directory location to write output files.')
 args = parser.parse_args()
 
@@ -185,12 +185,12 @@ def get_line_image_location():
                 base_name = os.path.basename(image_file_path)
                 base_name, b = base_name.split('.tif')
                 for lineID in sorted(text_line_word_dict):
-                    updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
+                    updated_base_name = "{}_{}.png".format(base_name, str(lineID).zfill(4))
                     location = image_loc_dict[updated_base_name]
                     image_file_path = os.path.join(location, updated_base_name)
                     line = text_line_word_dict[lineID]
                     text = ' '.join(''.join(line))
-                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
+                    utt_id = "{}_{}_{}_{}".format(writer_id, str(image_num).zfill(6), base_name, str(lineID).zfill(4))
                     text_fh.write(utt_id + ' ' + text + '\n')
                     utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
                     image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/mini_librispeech/s5/local/grammar/extend_vocab_demo.sh b/egs/mini_librispeech/s5/local/grammar/extend_vocab_demo.sh
index 382f9f4f6c6..1ec4a0d575b 100755
--- a/egs/mini_librispeech/s5/local/grammar/extend_vocab_demo.sh
+++ b/egs/mini_librispeech/s5/local/grammar/extend_vocab_demo.sh
@@ -96,7 +96,7 @@ if [ $stage -le 4 ]; then
   if $run_g2p; then
     steps/dict/apply_g2p.sh $tree_dir/extvocab_nosp_lexicon/words $tree_dir/extvocab_nosp_g2p  $tree_dir/extvocab_nosp_lexicon
   else
-    cat <<EOF >$tree_dir/extvocab_nosp_lexicon//lexicon.lex
+    cat <<EOF >$tree_dir/extvocab_nosp_lexicon/lexicon.lex
 HARDWIGG	0.962436	HH AA1 R D W IH1 G
 SUDVESTR	0.162048	S AH1 D V EY1 S T R
 SUDVESTR	0.133349	S AH1 D V EH1 S T R
diff --git a/egs/multi_en/s5/local/format_acronyms_ctm_eval2000.py b/egs/multi_en/s5/local/format_acronyms_ctm_eval2000.py
index 3c447c5976a..75cc4458d85 100755
--- a/egs/multi_en/s5/local/format_acronyms_ctm_eval2000.py
+++ b/egs/multi_en/s5/local/format_acronyms_ctm_eval2000.py
@@ -10,6 +10,7 @@
 # en_4156 B 414.58 0.16 l
 # en_4156 B 414.74 0.17 a
 
+from __future__ import division
 import argparse,re
 __author__ = 'Minhua Wu'
  
@@ -27,7 +28,7 @@
     if items[4].find(".") != -1:
         letters = items[4].split("._")
         acronym_period = round(float(items[3]), 2)
-        letter_slot = round(acronym_period / len(letters), 2)
+        letter_slot = round(acronym_period/len(letters), 2)
         time_start = round(float(items[2]), 2)
         for l in letters[:-1]:
             time = " %.2f %.2f " % (time_start, letter_slot)
diff --git a/egs/multi_en/s5/local/format_acronyms_ctm_rt03.py b/egs/multi_en/s5/local/format_acronyms_ctm_rt03.py
index 59814beb4ea..8438bbdaf81 100755
--- a/egs/multi_en/s5/local/format_acronyms_ctm_rt03.py
+++ b/egs/multi_en/s5/local/format_acronyms_ctm_rt03.py
@@ -10,6 +10,7 @@
 # en_4156 B 414.58 0.16 l
 # en_4156 B 414.74 0.17 a
 
+from __future__ import division
 import argparse,re
 __author__ = 'Minhua Wu'
  
@@ -27,7 +28,7 @@
     if items[4].find(".") != -1:
         letters = items[4].split("._")
         acronym_period = round(float(items[3]), 2)
-        letter_slot = round(acronym_period / len(letters), 2)
+        letter_slot = round(acronym_period/len(letters), 2)
         time_start = round(float(items[2]), 2)
         for l in letters[:-1]:
             time = " %.2f %.2f " % (time_start, letter_slot)
diff --git a/egs/multi_en/s5/local/normalize_transcript.py b/egs/multi_en/s5/local/normalize_transcript.py
index 4572f4d658d..c640723a885 100755
--- a/egs/multi_en/s5/local/normalize_transcript.py
+++ b/egs/multi_en/s5/local/normalize_transcript.py
@@ -7,6 +7,7 @@
 # This script normalizes the given "text" (transcript) file. The normalized result
 # is printed to STDOUT. This normalization should be applied to all corpora.
 
+from __future__ import print_function
 import re
 import sys
 
@@ -26,7 +27,7 @@ def normalize(utt):
 
 def main():
     if len(sys.argv) != 2:
-        print 'Usage: local/normalize_transcript.py [text_file]'
+        print('Usage: local/normalize_transcript.py [text_file]')
         sys.exit(1)
     with open(sys.argv[1], 'r') as f:
         for line in f.readlines():
diff --git a/egs/multi_en/s5/local/tedlium_join_suffix.py b/egs/multi_en/s5/local/tedlium_join_suffix.py
index c85e8f364f6..47db4ce0b05 100755
--- a/egs/multi_en/s5/local/tedlium_join_suffix.py
+++ b/egs/multi_en/s5/local/tedlium_join_suffix.py
@@ -12,6 +12,7 @@
 # Apache 2.0
 
 
+from __future__ import print_function
 import sys
 from codecs import open
 
diff --git a/egs/rimes/README.txt b/egs/rimes/README.txt
new file mode 100644
index 00000000000..d201c5fec4e
--- /dev/null
+++ b/egs/rimes/README.txt
@@ -0,0 +1,13 @@
+Rimes is a French handwriting recognition database created by A2iA.
+The database was created by asking individuals to write letters on a given scenario like
+a change of personal information, payment difficulty, damage declaration. The
+dataset has been used in several international research including ICFHR 2008,
+ICDAR-2009, ICDAR-2011 competitions for isolated word level and
+line level recognition tasks.
+
+It contains 11333 training lines and 788 test lines. It does not include
+a validation split but in a recent publication a 10% sampling of the total
+training lines for validation purposes were performed
+(http://www.jpuigcerver.net/pubs/jpuigcerver_icdar2017.pdf).
+We have used a similar train, test and validation split.
+More info: http://www.a2ialab.com/doku.php?id=rimes_database:start
diff --git a/egs/rimes/v1/cmd.sh b/egs/rimes/v1/cmd.sh
new file mode 100755
index 00000000000..6080a8bab68
--- /dev/null
+++ b/egs/rimes/v1/cmd.sh
@@ -0,0 +1,13 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export cmd="retry.pl queue.pl"
diff --git a/egs/rimes/v1/image b/egs/rimes/v1/image
new file mode 120000
index 00000000000..1668ee99922
--- /dev/null
+++ b/egs/rimes/v1/image
@@ -0,0 +1 @@
+../../cifar/v1/image/
\ No newline at end of file
diff --git a/egs/rimes/v1/local/chain/compare_wer.sh b/egs/rimes/v1/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..4a2cc29481c
--- /dev/null
+++ b/egs/rimes/v1/local/chain/compare_wer.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/cnn{1a,1b}"
+  exit 1
+fi
+. ./path.sh
+
+echo "# $0 $*"
+used_epochs=false
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+echo -n "# WER                        "
+for x in $*; do
+  wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER                        "
+for x in $*; do
+  cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# WER val                    "
+for x in $*; do
+  wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER val                    "
+for x in $*; do
+  cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)    "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Parameters                 "
+for x in $*; do
+  params=$(nnet3-info $x/final.mdl 2>/dev/null | grep num-parameters | cut -d' ' -f2 | awk '{printf "%0.2fM\n",$1/1000000}')
+  printf "% 10s" $params
+done
+echo
diff --git a/egs/rimes/v1/local/chain/run_cnn_e2eali.sh b/egs/rimes/v1/local/chain/run_cnn_e2eali.sh
new file mode 120000
index 00000000000..e2545b0186e
--- /dev/null
+++ b/egs/rimes/v1/local/chain/run_cnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_e2eali_1a.sh
\ No newline at end of file
diff --git a/egs/rimes/v1/local/chain/run_e2e_cnn.sh b/egs/rimes/v1/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..d26ba0182ce
--- /dev/null
+++ b/egs/rimes/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
new file mode 100755
index 00000000000..4eb3e5e1e76
--- /dev/null
+++ b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# e2eali_1a is a 6 cnn layer 3 tdnn layer model with dropout, l2-regularization, batch-normalization
+
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
+# System                      cnn_e2eali_1a
+# WER                              7.75
+# CER                              2.68
+# Final train prob              -0.0779
+# Final valid prob              -0.0860
+# Final train prob (xent)       -0.7744
+# Final valid prob (xent)       -0.8111
+# Parameters                      4.96M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a
+# exp/chain/cnn_e2eali_1a: num-iters=36 nj=3..8 num-params=5.0M dim=40->944 combine=-0.076->-0.076 (over 1) xent:train/valid[23,35,final]=(-1.48,-0.871,-0.774/-1.46,-0.888,-0.811) logprob:train/valid[23,35,final]=(-0.208,-0.102,-0.078/-0.189,-0.104,-0.086)
+
+# line level scoring result
+# WER 7.75 [ 437 / 5639, 62 ins, 55 del, 320 sub ] exp/chain/cnn_e2eali_1d/decode_test/wer_7_1.0
+# paragraph scoring result
+# WER 6.69 [ 377 / 5639, 44 ins, 37 del, 296 sub ] exp/chain/cnn_e2eali_1a/decode_test/para/wer_7_1.0
+
+set -e -o pipefail
+
+stage=0
+
+nj=50
+train_set=train
+decode_val=true
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+tree_affix=_1a
+bnf_chain_model_dir=exp/chain/e2e_cnn_1a
+bnf_layer_name=tdnn6.affine
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=1000
+# we don't need extra left/right context for TDNN systems.
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=true
+lang_decode=data/lang
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e${tree_affix}
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 --generate-ali-from-lats true \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+bnf_data_dir=$bnf_chain_model_dir/$(basename $train_data_dir)
+if [ $stage -le 3 ]; then
+  if [ -f $bnf_data_dir/feats.scp ]; then
+    echo "$0: $bnf_data_dir/feats.scp exists. Refusing to dump features!"
+    exit 1
+  fi
+
+  steps/nnet3/make_bottleneck_features.sh --cmd "$cmd" --use-gpu true \
+    --compress false --nj $nj \
+    $bnf_layer_name ${train_data_dir} ${bnf_data_dir} $bnf_chain_model_dir || exit 1
+fi
+
+if [ $stage -le 4 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor 4 \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${bnf_data_dir} \
+    $lang $lat_dir $tree_dir
+fi
+
+
+if [ $stage -le 5 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' models... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 6 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=10 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=8 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 8 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
new file mode 100755
index 00000000000..9d28a41316d
--- /dev/null
+++ b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# System                      e2e_cnn_1d
+# WER                             10.07
+# CER                              3.95
+# Final train prob               0.0369
+# Final valid prob              -0.0129
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                     12.73M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=20 nj=2..4 num-params=12.7M dim=40->19404 combine=0.079->0.079 (over 3) logprob:train/valid[12,19,final]=(0.017,0.034,0.037/-0.024,-0.013,-0.013)
+
+set -e
+
+# configs for 'chain'
+stage=0
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+nj=50
+
+# training options
+tdnn_dim=450
+minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
+common_egs_dir=
+train_set=train
+decode_val=true
+lang_decode=data/lang
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_bitree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type biphone \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn6 input=Append(-4,0,4) dim=200
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs 3 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 4 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/rimes/v1/local/combine_line_txt_to_paragraph.py b/egs/rimes/v1/local/combine_line_txt_to_paragraph.py
new file mode 100755
index 00000000000..5a794506b47
--- /dev/null
+++ b/egs/rimes/v1/local/combine_line_txt_to_paragraph.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+""" This script creates paragraph level text file. It reads 
+    the line level text file and combines them to get
+    paragraph level file.
+  Eg. local/combine_line_txt_to_paragraph.py
+  Eg. Input:  writer000000_eval2011-0_000001  Comme indiqué dans
+              writer000000_eval2011-0_000002  habitation n° DVT 36
+              writer000000_eval2011-0_000003  de mon domicile
+      Output: writer000000_eval2011-0 Comme indiqué dans habitation n° DVT 36 de mon domicile
+"""
+
+import argparse
+import os
+import io
+import sys
+### main ###
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+paragraph_txt_dict = dict()
+for line in infile:
+  line_vect = line.strip().split(' ')
+  line_id = int(line_vect[0].split('_')[-1])
+  paragraph_id = line_vect[0].split('-')[-1]
+  paragraph_id = int(paragraph_id.split('_')[0])
+  line_text = " ".join(line_vect[1:])
+  if paragraph_id not in paragraph_txt_dict.keys():
+      paragraph_txt_dict[paragraph_id] = dict()
+  paragraph_txt_dict[paragraph_id][line_id] = line_text
+
+
+para_txt_dict = dict()
+for para_id in sorted(paragraph_txt_dict.keys()):
+    para_txt = ""
+    for line_id in sorted(paragraph_txt_dict[para_id]):
+        text = paragraph_txt_dict[para_id][line_id]
+        para_txt = para_txt + " " + text
+    para_txt_dict[para_id] = para_txt
+    utt_id = 'writer' + str(para_id).zfill(6) + '_' + 'eval2011-' + str(para_id)
+    output.write(utt_id + ' ' + para_txt + '\n')
diff --git a/egs/rimes/v1/local/extract_features.sh b/egs/rimes/v1/local/extract_features.sh
new file mode 100755
index 00000000000..ec3bc8a268c
--- /dev/null
+++ b/egs/rimes/v1/local/extract_features.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment_type=no_aug
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/rimes/v1/local/prepare_data.sh b/egs/rimes/v1/local/prepare_data.sh
new file mode 100755
index 00000000000..502718e7777
--- /dev/null
+++ b/egs/rimes/v1/local/prepare_data.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# This script creates traing and validations splits, downloads text corpus for language modeling,
+#  prepares the training, validation and test data for rimes dataset 
+# (i.e text, images.scp, utt2spk and spk2utt). It calls process_data.py.
+
+#  Eg. local/prepare_data.sh
+#  Eg. text file: writer000150_train2011-150_000001 J'ai perdu mon emploi depuis 3 mois et je me
+#      utt2spk file: writer000150_train2011-150_000001 writer000150
+#      images.scp file: writer000150_train2011-150_000001 data/local/rimes_data/line_image/train/train2011-150_000001.png
+
+stage=0
+download_dir=data/local/rimes_data
+data_dir=data/local/rimes_data
+page_image=$data_dir/page_image
+xml=$data_dir/xml
+train_img_url="http://www.a2ialab.com/lib/exe/fetch.php?media=rimes_database:data:icdar2011:line:training_2011.tar";
+train_xml_url="http://www.a2ialab.com/lib/exe/fetch.php?media=rimes_database:data:icdar2011:line:training_2011.xml";
+test_xml_url="http://www.a2ialab.com/lib/exe/fetch.php?media=rimes_database:data:icdar2011:line:eval_2011_annotated.xml";
+test_img_url="http://www.a2ialab.com/lib/exe/fetch.php?media=rimes_database:data:icdar2011:line:eval_2011.tar";
+text_url="http://opus.nlpl.eu/download.php?f=OfisPublik.tar.gz"
+use_extra_corpus_text=true
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+mkdir -p data/{train,test,val}
+
+if [ -d $page_image ]; then
+  echo "$0: Not downloading data as it is already there."
+else
+  mkdir -p $data_dir/{page_image,xml,line_image}/{train_total,test,val,train}
+  tar -xf $download_dir/training_2011.tar -C $page_image/train_total || exit 1;
+  tar -xf $download_dir/eval_2011.tar -C $page_image/test || exit 1;
+  cp -r $download_dir/training_2011.xml $xml/train_total/rimes_2011.xml
+  cp -r $download_dir/eval_2011_annotated.xml $xml/test/rimes_2011.xml
+  echo "$0: Done downloading and extracting data"
+
+  #First 150 training page images are used for validation  
+  cat $xml/train_total/rimes_2011.xml | head -n451  > $xml/val/rimes_2011.xml
+  cat $xml/train_total/rimes_2011.xml | tail -1  >> $xml/val/rimes_2011.xml
+  cp -r $page_image/train_total/* $page_image/train
+
+  #Remaining training page images are used for training
+  cat $xml/train_total/rimes_2011.xml | head -1  > $xml/train/rimes_2011.xml
+  cat $xml/train_total/rimes_2011.xml | tail -n+452  >> $xml/train/rimes_2011.xml
+  cp -r $page_image/train_total/* $page_image/val
+fi
+
+if $use_extra_corpus_text; then
+  # using freely available french text corpus for language modeling
+  mkdir -p data/local/text_data
+  wget -P data/local/text_data $text_url || exit 1;
+  tar -xf data/local/text_data/download.php?f=OfisPublik.tar.gz -C data/local/text_data || exit 1;
+  zcat data/local/text_data/OfisPublik/raw/fr/*.gz > data/local/text_data/fr_text
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: Processing train, val and test data... $(date)."
+  local/process_data.py $data_dir train --augment true || exit 1
+  local/process_data.py $data_dir val || exit 1
+  local/process_data.py $data_dir  test || exit 1
+  for dataset in test train val; do
+    echo "$0: Fixing data directory for dataset: $dataset $(date)."
+    image/fix_data_dir.sh data/$dataset
+  done
+fi
diff --git a/egs/rimes/v1/local/prepare_dict.sh b/egs/rimes/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..d8093658c30
--- /dev/null
+++ b/egs/rimes/v1/local/prepare_dict.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Copyright      2017  Hossein Hadian
+#                2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+# This script prepares the dictionary.
+
+set -e
+dir=data/local/dict
+mkdir -p $dir
+
+local/prepare_lexicon.py $dir
+
+cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1;
+
+echo '<sil> SIL' >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/rimes/v1/local/prepare_lexicon.py b/egs/rimes/v1/local/prepare_lexicon.py
new file mode 100755
index 00000000000..5a6ac5b6dbf
--- /dev/null
+++ b/egs/rimes/v1/local/prepare_lexicon.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+# Copyright  2018  Ashish Arora
+
+import argparse
+import os
+
+parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+parser.add_argument('dir', type=str, help='output path')
+args = parser.parse_args()
+
+### main ###
+lex = {}
+text_path = os.path.join('data', 'train', 'text')
+text_fh = open(text_path, 'r', encoding='utf-8')
+
+with open(text_path, 'r', encoding='utf-8') as f:
+    for line in f:
+        line_vect = line.strip().split(' ')
+        for i in range(1, len(line_vect)):
+            characters = list(line_vect[i])
+            # Put SIL instead of "|". Because every "|" in the beginning of the words is for initial-space of that word
+            characters = " ".join(['SIL' if char == '|' else char for char in characters])
+            lex[line_vect[i]] = characters
+            if line_vect[i] == '#':
+                lex[line_vect[i]] = "<HASH>"
+
+with open(os.path.join(args.dir, 'lexicon.txt'), 'w', encoding='utf-8') as fp:
+    for key in sorted(lex):
+        fp.write(key + " " + lex[key] + "\n")
diff --git a/egs/rimes/v1/local/process_data.py b/egs/rimes/v1/local/process_data.py
new file mode 100755
index 00000000000..b87d9fbc5e2
--- /dev/null
+++ b/egs/rimes/v1/local/process_data.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+""" This script reads xml file and creates the following files :text, utt2spk, images.scp.
+    It also creates line images from page image and stores it into
+    data/local/rimes_data/train/lines.
+  Eg. local/process_data.py data/local/rimes_data/train train
+  Eg. text file: writer000000_train2011-0_000001 Je vous adresse ce courrier afin
+      utt2spk file: writer000000_train2011-0_000001 writer000000
+      images.scp file: writer000000_train2011-0_000001 \
+      data/local/rimes_data/train/lines/train2011-0_000001.png
+"""
+
+import argparse
+import xml.dom.minidom as minidom
+from PIL import Image
+import os
+import random
+parser = argparse.ArgumentParser(description="""Creates line images from page image.""")
+parser.add_argument('database_path', type=str,
+                    help='Path to the downloaded (and extracted) mdacat data')
+parser.add_argument('dataset', type=str,
+                    help='Subset of data to process.')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+parser.add_argument('--pixel-scaling', type=int, default=20,
+                    help='padding across horizontal/verticle direction')
+args = parser.parse_args()
+
+def expand_aabb(left, right, top, bottom, delta_pixel):
+    """ Increases size of axis aligned bounding box (aabb).
+    """
+    left = left - delta_pixel
+    right = right + delta_pixel
+    top = top - delta_pixel
+    bottom = bottom + delta_pixel
+    return left, right, top, bottom
+
+def get_line_images_from_page_image(file_name, left, right, top, bottom, line_id):
+    """ Given a page image, extracts the line images from it.
+    Input
+    -----
+    file_name (string): name of the page image.
+    left, right, top, bottom (int): coordinates corresponding to the line image.
+    line_id (int): line number on the page image.
+    """
+    page_image_path = os.path.join(page_image_folder, file_name)
+    im = Image.open(page_image_path)
+    box = (left, top, right, bottom)
+    region = im.crop(box)
+    base_name = os.path.splitext(os.path.basename(file_name))[0]
+    line_image_file_name = base_name + '_' +  str(line_id).zfill(6) + '.png'
+    imgray = region.convert('L')
+    line_image_path = os.path.join(args.database_path, 'line_image', args.dataset, line_image_file_name)
+    imgray.save(line_image_path)
+    return base_name, line_image_path
+
+def write_kaldi_process_data_files(base_name, line_id, text):
+    """creates files requires for dictionary and feats.scp.
+    Input
+    -----
+    image_path (string): name of the page image.
+    line_id (str): line number on the page image.
+    text: transcription of the line image.
+    base_name (string): 
+    """
+    writer_id = str(base_name.split('-')[1])
+    writer_id = str(writer_id).zfill(6)
+    writer_id = 'writer' + writer_id
+    utt_id = writer_id + '_' + base_name + '_' +  str(line_id).zfill(6)
+    line_image_file_name = base_name + '_' +  str(line_id).zfill(6) + '.png'
+    image_path = os.path.join(args.database_path, 'line_image', args.dataset, line_image_file_name)
+    text_fh.write(utt_id + ' ' + text + '\n')
+    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+    image_fh.write(utt_id + ' ' + image_path + '\n')
+
+### main ###
+text_file = os.path.join('data', args.dataset, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join('data', args.dataset, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join('data', args.dataset, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+xml_path = os.path.join(args.database_path, 'xml', args.dataset) + '/rimes_2011.xml'
+page_image_folder = os.path.join(args.database_path, 'page_image', args.dataset)
+doc = minidom.parse(xml_path)
+single_page = doc.getElementsByTagName('SinglePage')
+for page in single_page:
+    file_name = page.getAttribute('FileName')
+    line = page.getElementsByTagName('Line')
+    id = 0
+    for node in line:
+        id += 1
+        bottom = int(node.getAttribute('Bottom'))
+        left = int(node.getAttribute('Left'))
+        right = int(node.getAttribute('Right'))
+        top = int(node.getAttribute('Top'))
+        text = node.getAttribute('Value')
+        text_vect = text.split() # this is to avoid non-utf-8 spaces
+        text = " ".join(text_vect)
+        if args.augment:
+            base_name, image_path = get_line_images_from_page_image(file_name, left, right, top, bottom, str(id))
+            write_kaldi_process_data_files(base_name, str(id), text)
+            additional_pixel = random.randint(1, args.pixel_scaling)
+            left, right, top, bottom = expand_aabb(left, right, top, bottom, args.pixel_scaling + additional_pixel + 1)
+            line_id = str(id) + '_scale' + str(2)
+            base_name, image_path = get_line_images_from_page_image(file_name, left, right, top, bottom, line_id)
+            write_kaldi_process_data_files(base_name, line_id, text)
+        else:
+            base_name, image_path = get_line_images_from_page_image(file_name, left, right, top, bottom, str(id))
+            write_kaldi_process_data_files(base_name, str(id), text)
diff --git a/egs/rimes/v1/local/score.sh b/egs/rimes/v1/local/score.sh
new file mode 100755
index 00000000000..0cfbda9b556
--- /dev/null
+++ b/egs/rimes/v1/local/score.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -e
+cmd=run.pl
+stage=0
+decode_mbr=false
+stats=true
+beam=6
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+
+echo "$0 $@"  # Print the command line for logging
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+decode_dir=$3
+steps/scoring/score_kaldi_wer.sh --word_ins_penalty $word_ins_penalty \
+  --min_lmwt $min_lmwt --max_lmwt $max_lmwt "$@"
+
+steps/scoring/score_kaldi_cer.sh --word_ins_penalty $word_ins_penalty \
+  --min_lmwt $min_lmwt --max_lmwt $max_lmwt --stage 2 "$@"
+
+local/score_paragraph.sh --word_ins_penalty $word_ins_penalty \
+  --min_lmwt $min_lmwt --max_lmwt $max_lmwt $decode_dir
diff --git a/egs/rimes/v1/local/score_paragraph.sh b/egs/rimes/v1/local/score_paragraph.sh
new file mode 100755
index 00000000000..c6ef4da1d5b
--- /dev/null
+++ b/egs/rimes/v1/local/score_paragraph.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+min_lmwt=7
+max_lmwt=17
+word_ins_penalty=0.0,0.5,1.0
+
+set -e
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+decode_dir=$1
+test_para=$decode_dir/scoring_kaldi/test_filt_para.txt
+
+cat $decode_dir/scoring_kaldi/test_filt.txt | \
+  local/combine_line_txt_to_paragraph.py > $test_para
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  for LMWT in $(seq $min_lmwt $max_lmwt); do
+      mkdir -p $decode_dir/para/penalty_$wip
+      cat $decode_dir/scoring_kaldi/penalty_$wip/$LMWT.txt | \
+      local/combine_line_txt_to_paragraph.py > $decode_dir/para/penalty_$wip/$LMWT.txt
+  done
+done
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  for LMWT in $(seq $min_lmwt $max_lmwt); do
+      compute-wer --text --mode=present \
+      ark:$test_para ark:$decode_dir/para/penalty_$wip/$LMWT.txt &> $decode_dir/para/wer_${LMWT}_${wip} || exit 1;
+  done
+done
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  for lmwt in $(seq $min_lmwt $max_lmwt); do
+    # adding /dev/null to the command list below forces grep to output the filename
+    grep WER $decode_dir/para/wer_${lmwt}_${wip} /dev/null
+  done
+done | utils/best_wer.sh  >& $decode_dir/para/best_wer || exit 1
diff --git a/egs/rimes/v1/local/train_lm.sh b/egs/rimes/v1/local/train_lm.sh
new file mode 100755
index 00000000000..51927b7a97e
--- /dev/null
+++ b/egs/rimes/v1/local/train_lm.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+# Copyright 2016  Vincent Nguyen
+#           2016  Johns Hopkins University (author: Daniel Povey)
+#           2017  Ashish Arora
+#           2017  Hossein Hadian
+# Apache 2.0
+#
+# This script trains a LM on the training transcriptions.
+# It is based on the example scripts distributed with PocoLM
+
+# It will check if pocolm is installed and if not will proceed with installation
+
+set -e
+stage=0
+dir=data/local/local_lm
+order=6
+echo "$0 $@"  # Print the command line for logging
+. ./utils/parse_options.sh || exit 1;
+
+lm_dir=${dir}/data
+
+
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+( # First make sure the pocolm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d pocolm ]; then
+   echo Not installing the pocolm toolkit since it is already there.
+ else
+   echo "$0: Please install the PocoLM toolkit with: "
+   echo " cd ../../../tools; extras/install_pocolm.sh; cd -"
+   exit 1;
+ fi
+) || exit 1;
+
+bypass_metaparam_optim_opt=
+# If you want to bypass the metaparameter optimization steps with specific metaparameters
+# un-comment the following line, and change the numbers to some appropriate values.
+# You can find the values from output log of train_lm.py.
+# These example numbers of metaparameters is for 4-gram model (with min-counts)
+# running with train_lm.py.
+# The dev perplexity should be close to the non-bypassed model.
+# Note: to use these example parameters, you may need to remove the .done files
+# to make sure the make_lm_dir.py be called and tain only 3-gram model
+#for order in 3; do
+#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
+if [ $stage -le 0 ]; then
+  mkdir -p ${dir}/data
+  mkdir -p ${dir}/data/text
+
+  echo "$0: Getting the Data sources"
+
+  rm ${dir}/data/text/* 2>/dev/null || true
+
+  # use the validation data as the dev set.
+  # Note: the name 'dev' is treated specially by pocolm, it automatically
+  # becomes the dev set.
+  head -2000 data/train/text | cut -d " " -f 2-  > ${dir}/data/text/dev.txt
+
+  # use the training data as an additional data source.
+  # we can later fold the dev data into this.
+  tail -n +2000 data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
+
+  if [ -d "data/local/text_data" ]; then
+    cat data/local/text_data/fr_text | \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > ${dir}/data/text/corpus_text.txt
+  fi
+
+  # for reporting perplexities, we'll use the "real" dev set.
+  # (the validation data is used as ${dir}/data/text/dev.txt to work
+  # out interpolation weights.)
+  # note, we can't put it in ${dir}/data/text/, because then pocolm would use
+  # it as one of the data sources.
+  cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
+  cat ${dir}/data/text/{train,corpus_text}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
+fi
+
+if [ $stage -le 1 ]; then
+  # decide on the vocabulary.
+  # Note: you'd use --wordlist if you had a previously determined word-list
+  # that you wanted to use.
+  # Note: if you have more than one order, use a certain amount of words as the
+  # vocab and want to restrict max memory for 'sort',
+  echo "$0: training the unpruned LM"
+  min_counts='corpus_text=2 train=1'
+  wordlist=${dir}/data/wordlist
+
+  lm_name="`basename ${wordlist}`_${order}"
+  if [ -n "${min_counts}" ]; then
+    lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
+  fi
+  unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+  train_lm.py  --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \
+               --limit-unk-history=true \
+               ${bypass_metaparam_optim_opt} \
+               ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
+fi
diff --git a/egs/rimes/v1/local/wer_output_filter b/egs/rimes/v1/local/wer_output_filter
new file mode 100755
index 00000000000..d9cf1f4072e
--- /dev/null
+++ b/egs/rimes/v1/local/wer_output_filter
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Hossein Hadian
+
+# Apache 2.0
+# This script converts a BPE-encoded text to normal text. It is used in scoring
+
+import sys, io
+import string
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+for line in infile:
+  words = line.strip().split()
+  uttid = words[0]
+  transcript = ''.join(words[1:])
+  transcript = transcript.replace('|', ' ')
+  output.write(uttid + ' ' + transcript + '\n')
diff --git a/egs/rimes/v1/path.sh b/egs/rimes/v1/path.sh
new file mode 100755
index 00000000000..c7ebe7f2abf
--- /dev/null
+++ b/egs/rimes/v1/path.sh
@@ -0,0 +1,7 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LD_LIBRARY_PATH=$KALDI_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
+export LC_ALL=C
diff --git a/egs/rimes/v1/run_end2end.sh b/egs/rimes/v1/run_end2end.sh
new file mode 100755
index 00000000000..d3e3da2be13
--- /dev/null
+++ b/egs/rimes/v1/run_end2end.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+# Copyright 2018    Hossein Hadian
+#                   Ashish Arora
+#                   Jonathan Chang
+# Apache 2.0
+
+set -e
+stage=0
+nj=50
+overwrite=false
+rimes_database=/export/corpora5/handwriting_ocr/RIMES
+train_set=train
+use_extra_corpus_text=true
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+
+if [ $stage -le 0 ]; then
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download-dir "$rimes_database" \
+    --use_extra_corpus_text $use_extra_corpus_text
+
+fi
+
+mkdir -p data/{train,test,val}/data
+if [ $stage -le 1 ]; then
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  echo "$(date) Extracting features, creating feats.scp file"
+  for set in train test val; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
+  done
+  utils/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing BPE..."
+  # getting non-silence phones.
+  cut -d' ' -f2- data/train/text | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+for phone in phone_dict.keys():
+      output.write(phone+ '\n');
+END
+   ) > data/local/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/train_data.txt
+  cat data/local/phones.txt data/local/train_data.txt | \
+    utils/lang/bpe/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+  
+  for set in test train val; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Calling the flat-start chain recipe..."
+  local/chain/run_e2e_cnn.sh --train_set $train_set
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/$train_set data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  local/chain/run_cnn_e2eali.sh --train_set $train_set
+fi
diff --git a/egs/rimes/v1/steps b/egs/rimes/v1/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/rimes/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/rimes/v1/utils b/egs/rimes/v1/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/rimes/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/sitw/v1/local/make_musan.py b/egs/sitw/v1/local/make_musan.py
index 74c434990fb..c4b5c9359b4 100755
--- a/egs/sitw/v1/local/make_musan.py
+++ b/egs/sitw/v1/local/make_musan.py
@@ -47,9 +47,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -73,9 +73,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -99,9 +99,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/sprakbanken/s5/local/normalize_transcript.py b/egs/sprakbanken/s5/local/normalize_transcript.py
index 2374418bee7..21d70864f04 100755
--- a/egs/sprakbanken/s5/local/normalize_transcript.py
+++ b/egs/sprakbanken/s5/local/normalize_transcript.py
@@ -17,8 +17,8 @@
             "\t": " "
             }
 
-from_chars = ''.join(normdict.keys())
-to_chars = ''.join(normdict.values())
+from_chars = ''.join(list(normdict.keys()))
+to_chars = ''.join(list(normdict.values()))
 
 #t_table = maketrans(from_chars, to_chars)
 
diff --git a/egs/sprakbanken/s5/local/sprak2kaldi.py b/egs/sprakbanken/s5/local/sprak2kaldi.py
index f3abf1d9a38..5fa4baa1fa2 100755
--- a/egs/sprakbanken/s5/local/sprak2kaldi.py
+++ b/egs/sprakbanken/s5/local/sprak2kaldi.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 
 '''
+from __future__ import print_function
 
 
 import sys
@@ -59,8 +60,8 @@ def create_parallel_file_list(session, sndlist, txtlist):
         if len(os.listdir(session.sessiondir)) != 0:  # Check if there are files in the directory
             global n
             n += 1
-            session.sessiondir = session.sessiondir + "_" + str(n)
-            session.speaker_id = session.speaker_id + "_" + str(n)
+            session.sessiondir = "{}_{}".format(session.sessiondir, n)
+            session.speaker_id = "{}_{}".format(session.speaker_id, n)
             os.mkdir(session.sessiondir)
             shadow = True
     else:
diff --git a/egs/sprakbanken/s5/local/sprak2parallel.py b/egs/sprakbanken/s5/local/sprak2parallel.py
index b5fe56fd60f..3dc82e30ac2 100755
--- a/egs/sprakbanken/s5/local/sprak2parallel.py
+++ b/egs/sprakbanken/s5/local/sprak2parallel.py
@@ -76,8 +76,8 @@ def make_speech_corpus(top, dest, srcfolder):
         session.sessiondir = os.path.join(dest, session.filestem) +"."+ session.speaker_id
         if os.path.exists(session.sessiondir):
             n += 1
-            session.sessiondir = session.sessiondir+ "_" +str(n)
-            session.speaker_id+ "_" +str(n)
+            session.sessiondir = "{}_{}".format(session.sessiondir, n)
+            session.speaker_id = "{}_{}".format(session.speaker_id, n)
         os.mkdir(session.sessiondir)
         
         create_parallel_files(session)
diff --git a/egs/sprakbanken/s5/local/sprakparser.py b/egs/sprakbanken/s5/local/sprakparser.py
index 7bdf6ac94e3..1221cf0b023 100755
--- a/egs/sprakbanken/s5/local/sprakparser.py
+++ b/egs/sprakbanken/s5/local/sprakparser.py
@@ -22,11 +22,12 @@
 
 
 '''
+from __future__ import print_function
 
 import codecs
 import os
 
-class Session:
+class Session(object):
     
     delimit = ">-<"
         
@@ -151,7 +152,7 @@ def set_channel_vars(self, handle):
                 pass
             
     def create_filename(self, uid, file_ending):
-        return self.filestem+ "." +self.speaker_id+ "." +str(uid)+ "." +file_ending
+        return "{}.{}.{}.{}".format(self.filestem, self.speaker_id, uid, file_ending)
         
     def wavpath(self, topfolder):
         prefix, suffix = topfolder.rsplit('/data/', 1)
diff --git a/egs/sprakbanken/s5/local/writenumbers.py b/egs/sprakbanken/s5/local/writenumbers.py
index df3235243d4..c419b3c7550 100755
--- a/egs/sprakbanken/s5/local/writenumbers.py
+++ b/egs/sprakbanken/s5/local/writenumbers.py
@@ -22,6 +22,7 @@
 
 Changed to write output to file to prevent problems with shell ascii codec.
 '''
+from __future__ import print_function
 
 import sys
 import os
@@ -215,7 +216,7 @@ def rmPvAnnotation(string):
 
 def normNumber(line, table):
     tokens = line.split()
-    keys = table.keys()
+    keys = list(table.keys())
     for num, tok in enumerate(tokens):
         newtoks = splitNumeric(tok)
         if newtoks != False:
diff --git a/egs/sprakbanken_swe/s5/local/normalize_transcript.py b/egs/sprakbanken_swe/s5/local/normalize_transcript.py
index 90e45744e2a..150a9563aba 100755
--- a/egs/sprakbanken_swe/s5/local/normalize_transcript.py
+++ b/egs/sprakbanken_swe/s5/local/normalize_transcript.py
@@ -18,8 +18,8 @@
             }
 #removes all the above signs
 
-from_chars = ''.join(normdict.keys())
-to_chars = ''.join(normdict.values())
+from_chars = ''.join(list(normdict.keys()))
+to_chars = ''.join(list(normdict.values()))
 
 t_table = str.maketrans(normdict)
 
diff --git a/egs/sprakbanken_swe/s5/local/sprak2kaldi.py b/egs/sprakbanken_swe/s5/local/sprak2kaldi.py
index cc67344c36e..8f723762e50 100755
--- a/egs/sprakbanken_swe/s5/local/sprak2kaldi.py
+++ b/egs/sprakbanken_swe/s5/local/sprak2kaldi.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 
 '''
+from __future__ import print_function
 
 
 import sys
@@ -59,8 +60,8 @@ def create_parallel_file_list(session, sndlist, txtlist):
         if len(os.listdir(session.sessiondir)) != 0:  # Check if there are files in the directory
             global n
             n += 1
-            session.sessiondir = session.sessiondir + "_" + str(n)
-            session.speaker_id = session.speaker_id + "_" + str(n)
+            session.sessiondir = "{}_{}".format(session.sessiondir, n)
+            session.speaker_id = "{}_{}".format(session.speaker_id, n)
             os.mkdir(session.sessiondir)
             shadow = True
     else:
diff --git a/egs/sprakbanken_swe/s5/local/sprakparser.py b/egs/sprakbanken_swe/s5/local/sprakparser.py
index 4775328b56b..0951f7f39e7 100755
--- a/egs/sprakbanken_swe/s5/local/sprakparser.py
+++ b/egs/sprakbanken_swe/s5/local/sprakparser.py
@@ -26,7 +26,7 @@
 import codecs
 import os
 
-class Session:
+class Session(object):
     
     delimit = ">-<"
         
@@ -151,7 +151,7 @@ def set_channel_vars(self, handle):
                 pass
             
     def create_filename(self, uid, file_ending):
-        return self.filestem+ "." +self.speaker_id+ "." +str(uid)+ "." +file_ending
+        return "{}.{}.{}.{}".format(self.filestem, self.speaker_id, uid, file_ending)
         
     def wavpath(self, topfolder):
         prefix, suffix = topfolder.rsplit('/data/', 1)
diff --git a/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py b/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py
index 72a4572d9a0..e1a4fc534e0 100755
--- a/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py
+++ b/egs/sre08/v1/sid/nnet3/xvector/allocate_egs.py
@@ -65,6 +65,7 @@
 
 # We're using python 3.x style print but want it to work in python 2.x.
 from __future__ import print_function
+from __future__ import division
 import re, os, argparse, sys, math, warnings, random
 
 def get_args():
@@ -196,7 +197,7 @@ def deterministic_chunk_length(archive_id, num_archives, min_frames_per_chunk, m
   elif num_archives == 1:
     return int(max_frames_per_chunk);
   else:
-    return int(math.pow(float(max_frames_per_chunk) /
+    return int(math.pow(float(max_frames_per_chunk)/
                      min_frames_per_chunk, float(archive_id) /
                      (num_archives-1)) * min_frames_per_chunk + 0.5)
 
@@ -247,7 +248,7 @@ def main():
             length = deterministic_chunk_length(archive_index, args.num_archives, args.min_frames_per_chunk, args.max_frames_per_chunk);
         print("{0} {1}".format(archive_index + 1, length), file=info_f)
         archive_chunk_lengths.append(length)
-        this_num_egs = int((args.frames_per_iter / length) + 1)
+        this_num_egs = int(float(args.frames_per_iter) / length + 1)
         this_egs = [ ] # A 2-tuple of the form (utt-id, start-frame)
         spkrs = args.num_repeats * list(spk2utt.keys())
         random.shuffle(spkrs)
diff --git a/egs/sre10/v1/local/prepare_for_eer.py b/egs/sre10/v1/local/prepare_for_eer.py
index 59d2985e7c2..bb4e666f0ab 100755
--- a/egs/sre10/v1/local/prepare_for_eer.py
+++ b/egs/sre10/v1/local/prepare_for_eer.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 # Copyright 2015   David Snyder
 # Apache 2.0.
 #
@@ -12,4 +13,4 @@
   spkrutt2target[spkr+utt]=target
 for line in scores:
   spkr, utt, score = line.strip().split()
-  print score, spkrutt2target[spkr+utt]
+  print("{} {}".format(score, spkrutt2target[spkr+utt]))
diff --git a/egs/sre16/v1/local/make_musan.py b/egs/sre16/v1/local/make_musan.py
index b3f6652ba40..7735bd28818 100755
--- a/egs/sre16/v1/local/make_musan.py
+++ b/egs/sre16/v1/local/make_musan.py
@@ -43,9 +43,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -69,9 +69,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -95,9 +95,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/svhn/v1/local/process_data.py b/egs/svhn/v1/local/process_data.py
index f6ea85118f9..2a5bfc9a0d6 100755
--- a/egs/svhn/v1/local/process_data.py
+++ b/egs/svhn/v1/local/process_data.py
@@ -6,6 +6,7 @@
 
 """ This script prepares the training and test data for SVHN.
 """
+from __future__ import division
 
 import argparse
 import os
@@ -16,11 +17,11 @@
 parser = argparse.ArgumentParser(description="""Converts train/test data of
                                                 SVHN (Street View House Numbers)
                                                 dataset to Kaldi feature format""")
-parser.add_argument('matlab_file', type=str,
+parser.add_argument('matlab_file',
                     help='path to SVHN matlab data file (cropped version)')
-parser.add_argument('dir', type=str,
+parser.add_argument('dir',
                     help='output dir')
-parser.add_argument('--out-ark', type=str,
+parser.add_argument('--out-ark',
                     default='-', help='where to write output feature data')
 
 args = parser.parse_args()
@@ -48,7 +49,7 @@ def write_kaldi_matrix(file_handle, matrix, key):
         if num_cols != len(matrix[row_index]):
             raise Exception("All the rows of a matrix are expected to "
                             "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        file_handle.write(" ".join([str(x) for x in matrix[row_index]]))
         if row_index != num_rows - 1:
             file_handle.write("\n")
     file_handle.write(" ]\n")
@@ -80,7 +81,7 @@ def zeropad(x, length):
     lbl = labels[i, 0]
     if lbl == 10:
         lbl = 0
-    labels_fh.write(key + ' ' + str(lbl) + '\n')
+    labels_fh.write("{} {}\n".format(key, lbl))
     img = data[i]
     write_kaldi_matrix(out_fh, img, key)
     img_id += 1
diff --git a/egs/swbd/s5c/local/map_acronyms_ctm.py b/egs/swbd/s5c/local/map_acronyms_ctm.py
index bee488f73b0..7ae59d2a1d0 100755
--- a/egs/swbd/s5c/local/map_acronyms_ctm.py
+++ b/egs/swbd/s5c/local/map_acronyms_ctm.py
@@ -10,6 +10,7 @@
 # en_4156 B 414.58 0.16 l
 # en_4156 B 414.74 0.17 a
 
+from __future__ import division
 import argparse,re
 __author__ = 'Minhua Wu'
  
diff --git a/egs/tedlium/s5/local/join_suffix.py b/egs/tedlium/s5/local/join_suffix.py
index 64c62964331..c36b96a07f9 100755
--- a/egs/tedlium/s5/local/join_suffix.py
+++ b/egs/tedlium/s5/local/join_suffix.py
@@ -5,6 +5,7 @@
 # Apache 2.0
 
 
+from __future__ import print_function
 import sys
 from codecs import open
 
diff --git a/egs/tedlium/s5_r2/local/join_suffix.py b/egs/tedlium/s5_r2/local/join_suffix.py
index 64c62964331..c36b96a07f9 100755
--- a/egs/tedlium/s5_r2/local/join_suffix.py
+++ b/egs/tedlium/s5_r2/local/join_suffix.py
@@ -5,6 +5,7 @@
 # Apache 2.0
 
 
+from __future__ import print_function
 import sys
 from codecs import open
 
diff --git a/egs/tedlium/s5_r2/local/run_learn_lex.sh b/egs/tedlium/s5_r2/local/run_learn_lex_bayesian.sh
similarity index 98%
rename from egs/tedlium/s5_r2/local/run_learn_lex.sh
rename to egs/tedlium/s5_r2/local/run_learn_lex_bayesian.sh
index a2a6f2e46b8..f1497bfe202 100755
--- a/egs/tedlium/s5_r2/local/run_learn_lex.sh
+++ b/egs/tedlium/s5_r2/local/run_learn_lex_bayesian.sh
@@ -2,7 +2,7 @@
 #
 # This script demonstrates a lexicon learning recipe, which aims to imrove
 # the pronounciation of abbreviated words in the TED-LIUM lexicon. It assumes
-# the model exp/tri3 already exists. Please see steps/dict/learn_lexicon.sh
+# the model exp/tri3 already exists. Please see steps/dict/learn_lexicon_bayesian.sh
 # for explanation of the options. 
 #
 # Copyright 2016  Xiaohui Zhang
@@ -78,7 +78,7 @@ fi
 
 # Learn a lexicon based on the acoustic training data and the reference lexicon.
 if [ $stage -le 1 ]; then
-  steps/dict/learn_lexicon.sh --lexicon-g2p "$data/lexicon_oov_g2p.txt" \
+  steps/dict/learn_lexicon_bayesian.sh --lexicon-g2p "$data/lexicon_oov_g2p.txt" \
     --min-prob $min_prob --variants-prob-mass $variants_prob_mass \
     --variants-prob-mass-ref $variants_prob_mass_ref  \
     --prior-counts-tot $prior_counts_tot --prior-mean $prior_mean \
diff --git a/egs/tedlium/s5_r2/local/run_learn_lex_greedy.sh b/egs/tedlium/s5_r2/local/run_learn_lex_greedy.sh
new file mode 100755
index 00000000000..f69af3fe360
--- /dev/null
+++ b/egs/tedlium/s5_r2/local/run_learn_lex_greedy.sh
@@ -0,0 +1,133 @@
+#! /bin/bash
+#
+# This script demonstrates a lexicon learning recipe, which aims to imrove
+# the pronounciation of abbreviated words in the TED-LIUM lexicon. It assumes
+# the model exp/tri3 already exists. Please see steps/dict/learn_lexicon_greedy.sh
+# for explanation of the options. 
+#
+# Copyright 2018  Xiaohui Zhang
+# Apache 2.0
+
+. ./cmd.sh
+. ./path.sh
+
+oov_symbol="<unk>"
+# The user may have an phonetisaurus-trained English g2p model ready.
+g2p_mdl_dir=
+# The dir which contains the reference lexicon (most probably hand-derived)
+# we want to expand/improve, and nonsilence_phones.txt,.etc which we need  
+# for building new dict dirs.
+ref_dict=data/local/dict
+# acoustic training data we use to get alternative
+# pronunciations and collet acoustic evidence.
+data=data/train
+# the cut-off parameter used to select pronunciation candidates from phone
+# decoding. We remove pronunciations with probabilities less than this value
+# after normalizing the probs s.t. the max-prob is 1.0 for each word."
+min_prob=0.1
+# Refer to steps/dict/select_prons_greedy.sh for the detailed meaning of
+# alpha, beta and delta. Basically, the three dimensions of alpha
+# and beta correspond to three pronunciation sources: phonetic-
+# decoding, G2P and the reference lexicon, and the larger a value is,
+# the more aggressive we'll prune pronunciations from that sooure.
+# The valid range of each dim. is [0, 1] (for alpha, and 0 means 
+# we never pruned pron from that source.) [0, 100] (for beta). 
+alpha="0.04,0.02,0"
+beta="30,5,0"
+# Floor value of the pronunciation posterior statistics.
+delta=0.00000001
+# This parameter determines how many pronunciations we keep for each word
+# after the first pass pruning. See steps/dict/internal/prune_pron_candidates.py
+# for details.
+vcr=16 
+
+# Intermediate outputs of the lexicon learning stage will be put into dir
+dir=exp/tri3_lex_greedy_work
+nj=35
+decode_nj=30
+stage=0
+lexlearn_stage=0
+affix="learned_greedy"
+
+. utils/parse_options.sh # accept options
+
+# The reference vocab is the list of words which we already have hand-derived pronunciations.
+ref_vocab=data/local/vocab.txt
+cat $ref_dict/lexicon.txt | awk '{print $1}' | sort | uniq > $ref_vocab || exit 1; 
+
+# Get a G2P generated lexicon for oov words (w.r.t the reference lexicon)
+# in acoustic training data.
+if [ $stage -le 0 ]; then
+  if [ -z $g2p_mdl_dir ]; then
+    g2p_mdl_dir=exp/g2p_phonetisaurus
+    steps/dict/train_g2p_phonetisaurus.sh $ref_dict/lexicon.txt $g2p_mdl_dir || exit 1;
+  fi
+  awk '{for (n=2;n<=NF;n++) vocab[$n]=1;} END{for (w in vocab) printf "%s\n",w;}' \
+    $data/text | sort -u > $data/train_vocab.txt || exit 1;
+  awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $ref_vocab \
+    $data/train_vocab.txt | sort > $data/oov_train.txt || exit 1;
+  steps/dict/apply_g2p_phonetisaurus.sh --nbest 5 $data/train_vocab.txt $g2p_mdl_dir \
+    exp/g2p_phonetisaurus/lex_train || exit 1;
+fi
+
+# Learn a lexicon based on the acoustic training data and the reference lexicon.
+if [ $stage -le 1 ]; then
+  steps/dict/learn_lexicon_greedy.sh --lexiconp-g2p "exp/g2p_phonetisaurus/lex_train/lexicon.lex" \
+    --alpha $alpha --beta $beta --delta $delta \
+    --min-prob $min_prob --cmd "$train_cmd" \
+    --variant-counts-ratio $vcr \
+    --stage $lexlearn_stage --nj 60 --oov-symbol $oov_symbol --retrain-src-mdl false \
+    $ref_dict $ref_vocab $data exp/tri3 data/lang data/local/dict_${affix}_nosp \
+    $dir || exit 1;
+fi
+
+# Add pronounciation probs to the learned lexicon.
+if [ $stage -le 2 ]; then
+  utils/prepare_lang.sh --phone-symbol-table data/lang/phones.txt \
+    data/local/dict_${affix}_nosp $oov_symbol data/local/lang_${affix}_nosp data/lang_${affix}_nosp || exit 1;
+  
+  steps/align_si.sh --nj $nj --cmd "$train_cmd" \
+    $data data/lang_${affix}_nosp exp/tri2 exp/tri2_ali_${affix}_nosp || exit 1;
+  
+  steps/get_prons.sh --cmd "$train_cmd" data/train data/lang_${affix}_nosp exp/tri2_ali_${affix}_nosp || exit 1;
+  
+  utils/dict_dir_add_pronprobs.sh --max-normalize true \
+    data/local/dict_${affix}_nosp exp/tri2_ali_${affix}_nosp/pron_counts_nowb.txt \
+    exp/tri2_ali_${affix}_nosp/sil_counts_nowb.txt \
+    exp/tri2_ali_${affix}_nosp/pron_bigram_counts_nowb.txt data/local/dict_${affix} || exit 1;
+  
+  utils/prepare_lang.sh --phone-symbol-table data/lang/phones.txt \
+    data/local/dict_${affix} $oov_symbol data/local/lang_${affix} data/lang_${affix} || exit 1;
+fi
+
+# Re-decode
+if [ $stage -le 3 ]; then
+  ! cmp data/lang_nosp/words.txt data/lang_${affix}/words.txt &&\
+    echo "$0: The vocab of the affix lexicon and the reference vocab may be incompatible."
+  cp data/lang_nosp/G.fst data/lang_${affix}/
+  utils/mkgraph.sh data/lang_${affix} exp/tri3 exp/tri3/graph_${affix} || exit 1;
+  
+  for dset in dev test; do
+  (  steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd"  --num-threads 4 \
+      exp/tri3/graph_${affix} data/${dset} exp/tri3/decode_${affix}_${dset} || exit 1;
+  ) &
+  done
+fi
+
+# RESULTS:
+# Baseline:
+# %WER 18.7 | 507 17783 | 83.9 11.4 4.7 2.6 18.7 92.3 | -0.006 | exp/tri3/decode_dev/score_17_0.0/ctm.filt.filt.sys
+# %WER 17.6 | 1155 27500 | 84.7 11.6 3.7 2.4 17.6 87.2 | 0.013 | exp/tri3/decode_test/score_15_0.0/ctm.filt.filt.sys
+
+# Re-decoding with the learned lexicon:
+# %WER 18.5 | 507 17783 | 84.3 11.2 4.5 2.8 18.5 92.3 | -0.007 | exp/tri3/decode_learned_greedy_dev/score_16_0.0/ctm.filt.filt.sys
+# %WER 17.5 | 1155 27500 | 84.9 11.5 3.6 2.4 17.5 87.5 | 0.035 | exp/tri3/decode_learned_greedy_test/score_14_0.0/ctm.filt.filt.sys
+
+# To see the effect to neural-net results, one should re-train NN with the learned lexicon.
+# Experiments have shown that, with the new lang dir, one should just re-run NN training
+# starting from the supervision generation (steps/align_fmllr_lats.sh) stage, and should
+# expect improved overall WERs and word recognition performance on words whose pronunciations
+# were changed.
+
+exit
+wait
diff --git a/egs/tedlium/s5_r2_wsj/local/lm/merge_word_counts.py b/egs/tedlium/s5_r2_wsj/local/lm/merge_word_counts.py
index 6338cbbf875..85e15d8dc07 100755
--- a/egs/tedlium/s5_r2_wsj/local/lm/merge_word_counts.py
+++ b/egs/tedlium/s5_r2_wsj/local/lm/merge_word_counts.py
@@ -7,6 +7,7 @@
 A min-count argument is required to only write counts that are above the
 specified minimum count.
 """
+from __future__ import print_function
 
 import sys
 
@@ -21,7 +22,7 @@ def main():
         parts = line.strip().split()
         words[parts[1]] = words.get(parts[1], 0) + int(parts[0])
 
-    for word, count in words.iteritems():
+    for word, count in words.items():
         if count >= int(sys.argv[1]):
             print ("{0} {1}".format(count, word))
 
diff --git a/egs/tedlium/s5_r3/local/join_suffix.py b/egs/tedlium/s5_r3/local/join_suffix.py
index 64c62964331..c36b96a07f9 100755
--- a/egs/tedlium/s5_r3/local/join_suffix.py
+++ b/egs/tedlium/s5_r3/local/join_suffix.py
@@ -5,6 +5,7 @@
 # Apache 2.0
 
 
+from __future__ import print_function
 import sys
 from codecs import open
 
diff --git a/egs/thchs30/s5/local/dae/add-noise-mod.py b/egs/thchs30/s5/local/dae/add-noise-mod.py
index 8327fc325ee..4486fd0fdc7 100755
--- a/egs/thchs30/s5/local/dae/add-noise-mod.py
+++ b/egs/thchs30/s5/local/dae/add-noise-mod.py
@@ -3,6 +3,7 @@
 
 
 from __future__ import print_function
+from __future__ import division
 import optparse
 import random
 import bisect
@@ -26,7 +27,7 @@ def energy(mat):
   def mix(mat, noise, pos, scale):
     ret = []
     l = len(noise)
-    for i in xrange(len(mat)):
+    for i in range(len(mat)):
         x = mat[i]
         d = int(x + scale * noise[pos])
         #if d > 32767 or d < -32768:
@@ -41,8 +42,8 @@ def mix(mat, noise, pos, scale):
 
 def dirichlet(params):
     samples = [random.gammavariate(x, 1) if x > 0 else 0. for x in params]
-    samples = [x / sum(samples) for x in samples]
-    for x in xrange(1, len(samples)):
+    samples = [(x / sum(samples)) for x in samples]
+    for x in range(1, len(samples)):
         samples[x] += samples[x - 1]
     return bisect.bisect_left(samples, random.random())
 
@@ -125,7 +126,7 @@ def main():
         mat = wave_mat(wav)
         signal = energy(mat)
         logging.debug('signal energy: %f', signal)
-        noise = signal / (10 ** (noise_level / 10.))
+        noise = signal / (10 ** (noise_level / 10))
         logging.debug('noise energy: %f', noise)
         type = dirichlet(params)
         logging.debug('selected type: %d', type)
diff --git a/egs/tunisian_msa/s5/local/buckwalter2unicode.py b/egs/tunisian_msa/s5/local/buckwalter2unicode.py
index 94fec3225dd..f81841261ce 100755
--- a/egs/tunisian_msa/s5/local/buckwalter2unicode.py
+++ b/egs/tunisian_msa/s5/local/buckwalter2unicode.py
@@ -27,6 +27,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #
 
+from __future__ import print_function
 import sys, getopt, codecs, os, re
 
 # Declare a dictionary with Buckwalter's ASCII symbols as the keys, and
@@ -87,7 +88,7 @@
 uni2buck = {}
 
 # Iterate through all the items in the buck2uni dict.
-for (key, value) in buck2uni.iteritems():
+for (key, value) in buck2uni.items():
 		# The value from buck2uni becomes a key in uni2buck, and vice
 		# versa for the keys.
 		uni2buck[value] = key
@@ -108,103 +109,103 @@
 # A function to print to screen the usage details of this script.
 
 def usage():
-	print "Usage:", sys.argv[0], "-i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR"
-	print "       -r -e INPUT_ENCODING, -E OUTPUT ENCODING]"
-	print "      ", sys.argv[0], "-l"
-	print "      ", sys.argv[0], "-h"
-	print ""
-	print "  -i INFILE, --input=INFILE:"
-	print "    Path to text file to be transliterated to Unicode."
-	print "  -o OUTFILE, --output=OUTFILE:"
-	print "    Path of file to output the newly transliterated text."
-	print "  -e ENC, --input-encoding=ENC:"
-	print "    Specify the text encoding of the source file. Default: latin_1."
-	print "  -E ENC, --output-encoding=ENC:"
-	print "    Specify the text encoding of the target file. Default: utf_8."
-	print "  -g CHARS, --ignore-lines=CHARS:"
-	print "    Will not transliterate lines that start with any of the CHARS"
-	print "    given. E.g., -g #; will not alter lines starting with # or ;."
-	print "    (May need to be -g \#\; on some platforms. See README.txt.)"
-	print "  -c RANGE, --columns=RANGE:"
-	print "    If in columns, select columns to apply transliteration. Can be"
-	print "    comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3."
-	print "  -d CHAR, --delimiter=CHAR:"
-	print "    Specify the delimiter that defines the column if using the -c"
-	print "    option above. Default is ' ' (space)."
-	print "  -r, --reverse:"
-	print "    Reverses the transliteration, i.e., Arabic to Buckwalter."
-	print "    When used, it will change the default input encoding to utf_8 and"
-	print "    output encoding to latin_1" 
-	print "  -l, --list-encodings:"
-	print "    Displays all supported file encodings."
-	print "  -h, --help:"
-	print "    Displays this page."
-	print ""
+	print("Usage: {} -i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR".format(sys.argv[0]))
+	print("       -r -e INPUT_ENCODING, -E OUTPUT ENCODING]")
+	print("      {} -l".format(sys.argv[0]))
+	print("      {} -h".format(sys.argv[0]))
+	print("")
+	print("  -i INFILE, --input=INFILE:")
+	print("    Path to text file to be transliterated to Unicode.")
+	print("  -o OUTFILE, --output=OUTFILE:")
+	print("    Path of file to output the newly transliterated text.")
+	print("  -e ENC, --input-encoding=ENC:")
+	print("    Specify the text encoding of the source file. Default: latin_1.")
+	print("  -E ENC, --output-encoding=ENC:")
+	print("    Specify the text encoding of the target file. Default: utf_8.")
+	print("  -g CHARS, --ignore-lines=CHARS:")
+	print("    Will not transliterate lines that start with any of the CHARS")
+	print("    given. E.g., -g #; will not alter lines starting with # or ;.")
+	print("    (May need to be -g \#\; on some platforms. See README.txt.)")
+	print("  -c RANGE, --columns=RANGE:")
+	print("    If in columns, select columns to apply transliteration. Can be")
+	print("    comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3.")
+	print("  -d CHAR, --delimiter=CHAR:")
+	print("    Specify the delimiter that defines the column if using the -c")
+	print("    option above. Default is ' ' (space).")
+	print("  -r, --reverse:")
+	print("    Reverses the transliteration, i.e., Arabic to Buckwalter.")
+	print("    When used, it will change the default input encoding to utf_8 and")
+	print("    output encoding to latin_1") 
+	print("  -l, --list-encodings:")
+	print("    Displays all supported file encodings.")
+	print("  -h, --help:")
+	print("    Displays this page.")
+	print("")
 
 # A function to print to screen all the available encodings supported by
 # Python.
 
 def displayEncodings():
-	print "Codec		Aliases				Languages"
-	print "ascii		646, us-ascii 			English"
-	print "cp037 		IBM037, IBM039 			English"
-	print "cp424 		EBCDIC-CP-HE, IBM424		Hebrew"
-	print "cp437 		437, IBM437 			English"
-	print "cp500 		EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 	Western Europe"
-	print "cp737						Greek"
-	print "cp775 		IBM775				Baltic languages"
-	print "cp850 		850, IBM850 			Western Europe"
-	print "cp852 		852, IBM852 			Central and Eastern Europe"
-	print "cp855 		855, IBM855 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
-	print "cp856 		 	 			Hebrew"
-	print "cp857 		857, IBM857 			Turkish"
-	print "cp860 		860, IBM860 			Portuguese"
-	print "cp861 		861, CP-IS, IBM861		Icelandic"
-	print "cp862 		862, IBM862 			Hebrew"
-	print "cp863 		863, IBM863 			Canadian"
-	print "cp864 		IBM864				Arabic"
-	print "cp865 		865, IBM865 			Danish, Norwegian"
-	print "cp869 		869, CP-GR, IBM869 		Greek"
-	print "cp874 	  					Thai"
-	print "cp875 	  					Greek"
-	print "cp1006 	  					Urdu"
-	print "cp1026 		ibm1026				Turkish"
-	print "cp1140 		ibm1140				Western Europe"
-	print "cp1250 		windows-1250 			Central and Eastern Europe"
-	print "cp1251 		windows-1251 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
-	print "cp1252 		windows-1252 			Western Europe"
-	print "cp1253 		windows-1253 			Greek"
-	print "cp1254 		windows-1254 			Turkish"
-	print "cp1255 		windows-1255 			Hebrew"
-	print "cp1256 		windows-1256 			Arabic"
-	print "cp1257 		windows-1257		 	Baltic languages"
-	print "cp1258 		windows-1258		 	Vietnamese"
-	print "latin_1		iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1	West Europe"
-	print "iso8859_2 	iso-8859-2, latin2, L2		Central and Eastern Europe"
-	print "iso8859_3 	iso-8859-3, latin3, L3		Esperanto, Maltese"
-	print "iso8859_4 	iso-8859-4, latin4, L4		Baltic languagues"
-	print "iso8859_5 	iso-8859-5, cyrillic		Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
-	print "iso8859_6 	iso-8859-6, arabic		Arabic"
-	print "iso8859_7 	iso-8859-7, greek, greek8	Greek"
-	print "iso8859_8 	iso-8859-8, hebrew		Hebrew"
-	print "iso8859_9 	iso-8859-9, latin5, L5		Turkish"
-	print "iso8859_10 	iso-8859-10, latin6, L6 	Nordic languages"
-	print "iso8859_13 	iso-8859-13			Baltic languages"
-	print "iso8859_14 	iso-8859-14, latin8, L8		Celtic languages"
-	print "iso8859_15 	iso-8859-15			Western Europe"
-	print "koi8_r						Russian"
-	print "koi8_u						Ukrainian"
-	print "mac_cyrillic	maccyrillic			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
-	print "mac_greek	macgreek			Greek"
-	print "mac_iceland	maciceland			Icelandic"
-	print "mac_latin2	maclatin2, maccentraleurope	Central and Eastern Europe"
-	print "mac_roman 	macroman 			Western Europe"
-	print "mac_turkish 	macturkish 			Turkish"
-	print "utf_16 		U16, utf16 			all languages"
-	print "utf_16_be 	UTF-16BE 			all languages (BMP only)"
-	print "utf_16_le 	UTF-16LE 			all languages (BMP only)"
-	print "utf_7 		U7 				all languages"
-	print "utf_8 		U8, UTF, utf8 			all languages"
+	print("Codec		Aliases				Languages")
+	print("ascii		646, us-ascii 			English")
+	print("cp037 		IBM037, IBM039 			English")
+	print("cp424 		EBCDIC-CP-HE, IBM424		Hebrew")
+	print("cp437 		437, IBM437 			English")
+	print("cp500 		EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 	Western Europe")
+	print("cp737						Greek")
+	print("cp775 		IBM775				Baltic languages")
+	print("cp850 		850, IBM850 			Western Europe")
+	print("cp852 		852, IBM852 			Central and Eastern Europe")
+	print("cp855 		855, IBM855 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
+	print("cp856 		 	 			Hebrew")
+	print("cp857 		857, IBM857 			Turkish")
+	print("cp860 		860, IBM860 			Portuguese")
+	print("cp861 		861, CP-IS, IBM861		Icelandic")
+	print("cp862 		862, IBM862 			Hebrew")
+	print("cp863 		863, IBM863 			Canadian")
+	print("cp864 		IBM864				Arabic")
+	print("cp865 		865, IBM865 			Danish, Norwegian")
+	print("cp869 		869, CP-GR, IBM869 		Greek")
+	print("cp874 	  					Thai")
+	print("cp875 	  					Greek")
+	print("cp1006 	  					Urdu")
+	print("cp1026 		ibm1026				Turkish")
+	print("cp1140 		ibm1140				Western Europe")
+	print("cp1250 		windows-1250 			Central and Eastern Europe")
+	print("cp1251 		windows-1251 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
+	print("cp1252 		windows-1252 			Western Europe")
+	print("cp1253 		windows-1253 			Greek")
+	print("cp1254 		windows-1254 			Turkish")
+	print("cp1255 		windows-1255 			Hebrew")
+	print("cp1256 		windows-1256 			Arabic")
+	print("cp1257 		windows-1257		 	Baltic languages")
+	print("cp1258 		windows-1258		 	Vietnamese")
+	print("latin_1		iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1	West Europe")
+	print("iso8859_2 	iso-8859-2, latin2, L2		Central and Eastern Europe")
+	print("iso8859_3 	iso-8859-3, latin3, L3		Esperanto, Maltese")
+	print("iso8859_4 	iso-8859-4, latin4, L4		Baltic languagues")
+	print("iso8859_5 	iso-8859-5, cyrillic		Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
+	print("iso8859_6 	iso-8859-6, arabic		Arabic")
+	print("iso8859_7 	iso-8859-7, greek, greek8	Greek")
+	print("iso8859_8 	iso-8859-8, hebrew		Hebrew")
+	print("iso8859_9 	iso-8859-9, latin5, L5		Turkish")
+	print("iso8859_10 	iso-8859-10, latin6, L6 	Nordic languages")
+	print("iso8859_13 	iso-8859-13			Baltic languages")
+	print("iso8859_14 	iso-8859-14, latin8, L8		Celtic languages")
+	print("iso8859_15 	iso-8859-15			Western Europe")
+	print("koi8_r						Russian")
+	print("koi8_u						Ukrainian")
+	print("mac_cyrillic	maccyrillic			Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
+	print("mac_greek	macgreek			Greek")
+	print("mac_iceland	maciceland			Icelandic")
+	print("mac_latin2	maclatin2, maccentraleurope	Central and Eastern Europe")
+	print("mac_roman 	macroman 			Western Europe")
+	print("mac_turkish 	macturkish 			Turkish")
+	print("utf_16 		U16, utf16 			all languages")
+	print("utf_16_be 	UTF-16BE 			all languages (BMP only)")
+	print("utf_16_le 	UTF-16LE 			all languages (BMP only)")
+	print("utf_7 		U7 				all languages")
+	print("utf_8 		U8, UTF, utf8 			all languages")
 
 def parseIgnoreString(string):
 	
@@ -254,13 +255,13 @@ def parseIgnoreString(string):
 		delimiter = delimiter.replace("\\t", "\t")
 		# Do some error checking
 		if len(delimiter) > 1:
-			print >>sys.stderr, "Delimeter should only be a single character. Using first character" + delimiter[0]
+			print("Delimeter should only be a single character. Using first character" + delimiter[0], file=sys.stderr)
 			delimiter = delimiter[0]
 		
 		if buck2uni.get(delimiter):
-			print >> sys.stderr, "Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set."
-			print >> sys.stderr, "This will obviously cause much confusion as a delimiter!"
-			print >> sys.stderr, "Please try again. Aborting..."
+			print("Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set.", file=sys.stderr)
+			print("This will obviously cause much confusion as a delimiter!", file=sys.stderr)
+			print("Please try again. Aborting...", file=sys.stderr)
 			sys.exit(1)
 
 # If no delimiter was set then, set the default to " " (space)
@@ -303,16 +304,16 @@ def parseIgnoreString(string):
 		# specified output encoding.
 		outFile = codecs.open(outFilename, "w", outEnc)
 
-    except IOError, msg:
+    except IOError as msg:
 		# A problem occurred when trying to open this file. Report to
 		# user...
-        print msg
+        print(msg)
         sys.exit(1)
 
 # Script can not work without somewhere to store the transliteration.
 # Exit. 
 else:
-	print "Must specify a file to use store the output! Aborting..."
+	print("Must specify a file to use store the output! Aborting...")
 	sys.exit(1)
 
 # Providing a file for input was specified...
@@ -322,15 +323,15 @@ def parseIgnoreString(string):
 		# specified input encoding.
 		inFile = codecs.open(inFilename, "r", inEnc)
 
-    except IOError, msg:
+    except IOError as msg:
 		# A problem occurred when trying to open this file. Report to
 		# user...
-        print msg
+        print(msg)
         sys.exit(1)
 
 # This script requires a file to read from. Exit.
 else:
-	print "Must specify a file to use as input! Aborting..."
+	print("Must specify a file to use as input! Aborting...")
 	sys.exit(1)
 
 def getColsFromRange(cRange):
@@ -344,7 +345,7 @@ def getColsFromRange(cRange):
 		# If it contains a hyphen (e.g., 1-3)
 		if hyphenSearch.search(i):
 			[start, end] = i.split("-")
-			columns = columns + range(int(start)-1,int(end))
+			columns = columns + list(range(int(start)-1,int(end)))
 		else:
 			columns.append(int(i)-1)
 
@@ -441,9 +442,9 @@ def transliterateString(inString):
 		
 		currentLineNumber = currentLineNumber + 1
 
-	except UnicodeError, msg:
+	except UnicodeError as msg:
 		# A problem when writing occurred. Report to user...
-		print msg
+		print(msg)
 		sys.exit(1)
 
 # All done! Better close the files used before terminating...
diff --git a/egs/uw3/v1/local/make_features.py b/egs/uw3/v1/local/make_features.py
index dd0a30a19d7..e0211963e39 100755
--- a/egs/uw3/v1/local/make_features.py
+++ b/egs/uw3/v1/local/make_features.py
@@ -24,8 +24,8 @@
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
-parser.add_argument('dir', type=str, help='data directory (should contain images.scp)')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output feature file.')
+parser.add_argument('dir', help='data directory (should contain images.scp)')
+parser.add_argument('--out-ark', default='-', help='where to write the output feature file.')
 parser.add_argument('--feat-dim', type=int, default=40,
                     help='size to scale the height of all images (i.e. the dimension of the resulting features)')
 parser.add_argument('--pad', type=bool, default=False, help='pad the left and right of the images with 10 white pixels.')
@@ -43,7 +43,7 @@ def write_kaldi_matrix(file_handle, matrix, key):
         if num_cols != len(matrix[row_index]):
             raise Exception("All the rows of a matrix are expected to "
                             "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        file_handle.write(" ".join([str(x) for x in matrix[row_index]]))
         if row_index != num_rows - 1:
             file_handle.write("\n")
     file_handle.write(" ]\n")
diff --git a/egs/uw3/v1/local/process_data.py b/egs/uw3/v1/local/process_data.py
index f5b37b04c2f..3643c0aca89 100755
--- a/egs/uw3/v1/local/process_data.py
+++ b/egs/uw3/v1/local/process_data.py
@@ -14,8 +14,8 @@
 import random
 
 parser = argparse.ArgumentParser(description="""Creates data/train and data/test.""")
-parser.add_argument('database_path', type=str, help='path to downloaded (and extracted) UW3 corpus')
-parser.add_argument('out_dir', type=str, default='data',
+parser.add_argument('database_path', help='path to downloaded (and extracted) UW3 corpus')
+parser.add_argument('out_dir', default='data',
                     help='where to create the train and test data directories')
 args = parser.parse_args()
 
@@ -53,9 +53,9 @@
       coin = random.randint(0, 20)
       if coin >= 1:
         train_text_fh.write(utt_id + ' ' + text + '\n')
-        train_utt2spk_fh.write(utt_id + ' ' + str(page_count) + '\n')
-        train_image_fh.write(utt_id + ' ' + image_path + '\n')
+        train_utt2spk_fh.write("{} {}\n".format(utt_id, page_count))
+        train_image_fh.write("{} {}\n".format(utt_id, image_path)
       elif coin < 1:
-        test_text_fh.write(utt_id + ' ' + text + '\n')
-        test_utt2spk_fh.write(utt_id + ' ' + str(page_count) + '\n')
-        test_image_fh.write(utt_id + ' ' + image_path + '\n')
+        test_text_fh.write("{} {}\n".format(utt_id, text))
+        test_utt2spk_fh.write("{} {}\n".format(utt_id, page_count))
+        train_image_fh.write("{} {}\n".format(utt_id, image_path)
diff --git a/egs/voxceleb/v1/local/make_musan.py b/egs/voxceleb/v1/local/make_musan.py
index 74c434990fb..565bfce0cc9 100755
--- a/egs/voxceleb/v1/local/make_musan.py
+++ b/egs/voxceleb/v1/local/make_musan.py
@@ -47,9 +47,9 @@ def prepare_music(root_dir, use_vocals):
         utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In music directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_speech(root_dir):
@@ -73,9 +73,9 @@ def prepare_speech(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In speech directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def prepare_noise(root_dir):
@@ -99,9 +99,9 @@ def prepare_noise(root_dir):
       utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
       num_good_files += 1
     else:
-      print("Missing file", utt)
+      print("Missing file {}".format(utt))
       num_bad_files += 1
-  print("In noise directory, processed", num_good_files, "files;", num_bad_files, "had missing wav data")
+  print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
   return utt2spk_str, utt2wav_str
 
 def main():
diff --git a/egs/voxceleb/v1/local/prepare_for_eer.py b/egs/voxceleb/v1/local/prepare_for_eer.py
index 6bfa04e011b..2f569b70bc5 100755
--- a/egs/voxceleb/v1/local/prepare_for_eer.py
+++ b/egs/voxceleb/v1/local/prepare_for_eer.py
@@ -16,4 +16,4 @@
   spkrutt2target[spkr+utt]=target
 for line in scores:
   spkr, utt, score = line.strip().split()
-  print(score, spkrutt2target[spkr+utt])
+  print("{} {}".format(score, spkrutt2target[spkr+utt]))
diff --git a/egs/voxceleb/v2/run.sh b/egs/voxceleb/v2/run.sh
index e57799cee27..f8c50d7f9df 100755
--- a/egs/voxceleb/v2/run.sh
+++ b/egs/voxceleb/v2/run.sh
@@ -27,7 +27,7 @@ stage=0
 if [ $stage -le 0 ]; then
   local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
   local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
-  # This script reates data/voxceleb1_test and data/voxceleb1_train.
+  # This script creates data/voxceleb1_test and data/voxceleb1_train.
   # Our evaluation set is the test portion of VoxCeleb1.
   local/make_voxceleb1.pl $voxceleb1_root data
   # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
diff --git a/egs/voxforge/gst_demo/run-live.py b/egs/voxforge/gst_demo/run-live.py
index 725a306c42c..7876e5f2046 100755
--- a/egs/voxforge/gst_demo/run-live.py
+++ b/egs/voxforge/gst_demo/run-live.py
@@ -6,6 +6,7 @@
 #
 # Apache 2.0
 
+from __future__ import print_function
 import sys
 import os
 import gi
@@ -46,7 +47,7 @@ def init_gst(self):
         """Initialize the speech components"""
         self.pulsesrc = Gst.ElementFactory.make("pulsesrc", "pulsesrc")
         if self.pulsesrc == None:
-            print >> sys.stderr, "Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package"
+            print("Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package", file=sys.stderr)
             sys.exit()	
         self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert")
         self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample")    
@@ -56,7 +57,7 @@ def init_gst(self):
         if self.asr:
           model_dir = "online-data/models/tri2b_mmi/"
           if not os.path.isdir(model_dir):
-              print >> sys.stderr, "Model (%s) not downloaded. Run run-simulated.sh first" % model_dir
+              print("Model (%s) not downloaded. Run run-simulated.sh first" % model_dir, file=sys.stderr)
               sys.exit(1)
           self.asr.set_property("fst", model_dir + "HCLG.fst")
           self.asr.set_property("lda-mat", model_dir + "matrix")
@@ -67,12 +68,12 @@ def init_gst(self):
           self.asr.set_property("beam", 12.0)
           self.asr.set_property("acoustic-scale", 0.0769)
         else:
-          print >> sys.stderr, "Couldn't create the onlinegmmfasterdecoder element. "
+          print("Couldn't create the onlinegmmfasterdecoder element. ", file=sys.stderr)
           if "GST_PLUGIN_PATH" in os.environ:
-            print >> sys.stderr, "Have you compiled the Kaldi GStreamer plugin?"
+            print("Have you compiled the Kaldi GStreamer plugin?", file=sys.stderr)
           else:
-            print >> sys.stderr, "You probably need to set the GST_PLUGIN_PATH envoronment variable"
-            print >> sys.stderr, "Try running: GST_PLUGIN_PATH=../../../src/gst-plugin %s" % sys.argv[0]
+            print("You probably need to set the GST_PLUGIN_PATH envoronment variable", file=sys.stderr)
+            print("Try running: GST_PLUGIN_PATH=../../../src/gst-plugin %s" % sys.argv[0], file=sys.stderr)
           sys.exit();
         
         # initially silence the decoder
@@ -111,10 +112,10 @@ def button_clicked(self, button):
 
 if __name__ == '__main__':
   app = DemoApp()
-  print '''
+  print('''
   The (bigram) language model used to build the decoding graph was
   estimated on an audio book's text. The text in question is
   King Solomon's Mines" (http://www.gutenberg.org/ebooks/2166).
-  You may want to read some sentences from this book first ...'''
+  You may want to read some sentences from this book first ...''')
 
   Gtk.main()
diff --git a/egs/voxforge/s5/local/make_trans.py b/egs/voxforge/s5/local/make_trans.py
index 1b4f5c4136a..612755c8be4 100755
--- a/egs/voxforge/s5/local/make_trans.py
+++ b/egs/voxforge/s5/local/make_trans.py
@@ -12,11 +12,12 @@
 if this is the case produces a transcript line for each file in the format:
 prefix_a0405 IT SEEMED THE ORDAINED ORDER OF THINGS THAT DOGS SHOULD WORK
 """
+from __future__ import print_function
 
 import sys
 
 def err(msg):
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
 
 if len(sys.argv) < 3:
     err("Usage: %s <prompts-file> <id-prefix> <utt-id1> <utt-id2> ... " % sys.argv[0])
@@ -46,5 +47,5 @@ def err(msg):
     if not uid in utt2trans:
         err("No transcript found for %s_%s" % (id_prefix, uid))
         continue
-    print "%s-%s %s" % (id_prefix, uid, utt2trans[uid])
+    print("%s-%s %s" % (id_prefix, uid, utt2trans[uid]))
 
diff --git a/egs/vystadial_cz/online_demo/build_reference.py b/egs/vystadial_cz/online_demo/build_reference.py
index 1be78391d2f..aea12a2c8bc 100755
--- a/egs/vystadial_cz/online_demo/build_reference.py
+++ b/egs/vystadial_cz/online_demo/build_reference.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # encoding: utf-8
 from __future__ import unicode_literals
+from __future__ import print_function
 
 import glob
 import sys
@@ -8,7 +9,7 @@
 import codecs
 
 def build_reference(wav_scp, ref_path):
-    print wav_scp, ref_path
+    print(wav_scp, ref_path)
     with codecs.open(ref_path, 'w', 'utf-8') as w:
         with codecs.open(wav_scp, 'r', 'utf-8') as scp:
             for line in scp:
@@ -31,8 +32,8 @@ def build_reference(wav_scp, ref_path):
     usage_args = {'exec': sys.argv[0]}
 
     if len(sys.argv) != 3:
-        print >> sys.stderr, "Wrong number of arguments"
-        print >> sys.stderr, usage % {'exec': sys.argv[0]}
+        print("Wrong number of arguments", file=sys.stderr)
+        print(usage % {'exec': sys.argv[0]}, file=sys.stderr)
         sys.exit(1)
 
     if sys.argv[1].endswith('scp'):
@@ -41,12 +42,12 @@ def build_reference(wav_scp, ref_path):
         scps = glob.glob(os.path.join(sys.argv[1], '*.scp'))
     target_dir = sys.argv[2]
     if not len(scps):
-        print >> sys.stderr, "No '*.scp' files found"
-        print >> sys.stderr, usage % {'exec': sys.argv[0]}
+        print("No '*.scp' files found", file=sys.stderr)
+        print(usage % {'exec': sys.argv[0]}, file=sys.stderr)
         sys.exit(1)
     if not os.path.isdir(target_dir):
-        print >> sys.stderr, "No '*.scp' files found"
-        print >> sys.stderr, usage % {'exec': sys.argv[0]}
+        print("No '*.scp' files found", file=sys.stderr)
+        print(usage % {'exec': sys.argv[0]}, file=sys.stderr)
         sys.exit(1)
 
     refers = [os.path.join(target_dir, os.path.basename(scp) + '.tra') for scp in scps]
diff --git a/egs/vystadial_cz/online_demo/live-demo.py b/egs/vystadial_cz/online_demo/live-demo.py
index 6b41c12c739..320a930735f 100755
--- a/egs/vystadial_cz/online_demo/live-demo.py
+++ b/egs/vystadial_cz/online_demo/live-demo.py
@@ -15,6 +15,7 @@
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License. #
 from __future__ import unicode_literals
+from __future__ import print_function
 
 import pyaudio
 from kaldi.decoders import PyOnlineLatgenRecogniser
@@ -29,7 +30,7 @@
 CHANNELS, RATE, FORMAT = 1, 16000, pyaudio.paInt16
 
 
-class LiveDemo:
+class LiveDemo(object):
 
     def __init__(self, audio_batch_size, wst, dec_args):
         self.batch_size = audio_batch_size
@@ -127,7 +128,7 @@ def save_wav(self):
 if __name__ == '__main__':
     audio_batch_size, wst_path = int(sys.argv[1]), sys.argv[2]
     argv = sys.argv[3:]
-    print >> sys.stderr, 'Python args: %s' % str(sys.argv)
+    print('Python args: %s' % str(sys.argv), file=sys.stderr)
 
     wst = wst2dict(wst_path)
     demo = LiveDemo(audio_batch_size, wst, argv)
diff --git a/egs/vystadial_cz/online_demo/pykaldi-online-latgen-recogniser.py b/egs/vystadial_cz/online_demo/pykaldi-online-latgen-recogniser.py
index 02a0400921c..0008a4c01f1 100755
--- a/egs/vystadial_cz/online_demo/pykaldi-online-latgen-recogniser.py
+++ b/egs/vystadial_cz/online_demo/pykaldi-online-latgen-recogniser.py
@@ -14,6 +14,8 @@
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License. #
 from __future__ import unicode_literals
+from __future__ import division
+from __future__ import print_function
 
 from kaldi.utils import load_wav, wst2dict, lattice_to_nbest
 from kaldi.decoders import PyOnlineLatgenRecogniser
@@ -31,14 +33,14 @@ def write_decoded(f, wav_name, word_ids, wst):
     if wst is not None:
         decoded = [wst[w] for w in best_path]
     else:
-        decoded = [unicode(w) for w in best_path]
+        decoded = [str(w) for w in best_path]
     line = u' '.join([wav_name] + decoded + ['\n'])
     if DEBUG:
-        print '%s best path %s' % (wav_name, decoded.encode('UTF-8'))
+        print('%s best path %s' % (wav_name, decoded.encode('UTF-8')))
         for i, s in enumerate(word_ids):
             if i > 0:
                 break
-            print 'best path %d: %s' % (i, str(s))
+            print('best path %d: %s' % (i, str(s)))
     f.write(line.encode('UTF-8'))
 
 
@@ -55,11 +57,11 @@ def decode(d, pcm):
         while dec_t > 0:
             decoded_frames += dec_t
             dec_t = d.decode(max_frames=10)
-    print "forward decode: %s secs" % str(time.time() - start)
+    print("forward decode: %s secs" % str(time.time() - start))
     start = time.time()
     d.prune_final()
     lik, lat = d.get_lattice()
-    print "backward decode: %s secs" % str(time.time() - start)
+    print("backward decode: %s secs" % str(time.time() - start))
     d.reset(keep_buffer_data=False)
     return (lat, lik, decoded_frames)
 
@@ -72,7 +74,7 @@ def decode_wrap(argv, audio_batch_size, wav_paths,
     for wav_name, wav_path in wav_paths:
         sw, sr = 2, 16000  # 16-bit audio so 1 sample_width = 2 chars
         pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr)
-        print '%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr)
+        print('%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr))
         lat, lik, decoded_frames = decode(d, pcm)
         lat.isyms = lat.osyms = fst.read_symbols_text(wst_path)
         if DEBUG:
@@ -80,8 +82,8 @@ def decode_wrap(argv, audio_batch_size, wav_paths,
                 f.write(lat._repr_svg_())
             lat.write('%s_pykaldi.fst' % wav_name)
 
-        print "Log-likelihood per frame for utterance %s is %f over %d frames" % (
-            wav_name, (lik / decoded_frames), decoded_frames)
+        print("Log-likelihood per frame for utterance %s is %f over %d frames" % (
+            wav_name, int(lik / decoded_frames), decoded_frames))
         word_ids = lattice_to_nbest(lat, n=10)
         write_decoded(file_output, wav_name, word_ids, wst)
 
@@ -90,7 +92,7 @@ def decode_wrap(argv, audio_batch_size, wav_paths,
     audio_scp, audio_batch_size = sys.argv[1], int(sys.argv[2])
     dec_hypo, wst_path = sys.argv[3], sys.argv[4]
     argv = sys.argv[5:]
-    print >> sys.stderr, 'Python args: %s' % str(sys.argv)
+    print('Python args: %s' % str(sys.argv), file=sys.stderr)
 
     # open audio_scp, decode and write to dec_hypo file
     with open(audio_scp, 'rb') as r:
diff --git a/egs/vystadial_cz/s5/local/results.py b/egs/vystadial_cz/s5/local/results.py
index a7c19af214c..f37109d5fcb 100755
--- a/egs/vystadial_cz/s5/local/results.py
+++ b/egs/vystadial_cz/s5/local/results.py
@@ -14,6 +14,8 @@
 # MERCHANTABLITY OR NON-INFRINGEMENT.
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License. #
+from __future__ import division
+from __future__ import print_function
 import argparse
 import glob
 import sys
@@ -29,8 +31,8 @@ def extract_stat(wer_file):
             ser = float(s[2].split()[1])
 
     except Exception as e:
-        print sys.stderr, 'Error parsing file %s' % wer_file
-        print sys.stderr, str(e)
+        print(sys.stderr, 'Error parsing file %s' % wer_file)
+        print(sys.stderr, str(e))
     return wer, ser
 
 
@@ -47,8 +49,8 @@ def extractResults(path):
             wer, ser = extract_stat(wf)
             table.append((exp, dataset, lm,  lm_w, wer, ser))
         except Exception as e:
-            print >> sys.stderr, 'failed to parse %s' % wf
-            print >> sys.stderr, str(e)
+            print('failed to parse %s' % wf, file=sys.stderr)
+            print(str(e), file=sys.stderr)
     return table
 
 
@@ -105,7 +107,7 @@ def Table2LatexTable(table):
 
 def createSmallTable(r):
     d = []
-    for k, v in r.iteritems():
+    for k, v in r.items():
         w, s, r = v
         if w == []:
             minw = None
@@ -115,7 +117,7 @@ def createSmallTable(r):
             mins = None
         else:
             mins = min(s)  # returns tuple if s is list of tuples
-        mean_r = sum(r) / float(len(r))
+        mean_r = float(sum(r)) / len(r)
         d.append([k, mean_r, minw, mins])
     t = Table(d, ['exp', 'RT coef', 'WER', 'SER'])
     return t
@@ -167,7 +169,7 @@ def createSmallTable(r):
 
     # remove duplicates: duplicates if equal mimimum wer in dev set
     min_dev_un = [(e, lm, lmw) for ((e, lm), lmw) in
-                  dict([((e, lm), lmw) for e, lm, lmw in min_dev]).items()]
+                  list(dict([((e, lm), lmw) for e, lm, lmw in min_dev]).items())]
     # sort according LM -> sort results according experiment & LMs
     min_dev_un.sort(key=lambda x: (x[1], x[0]))
 
@@ -182,6 +184,6 @@ def createSmallTable(r):
         d.append(x[0])
 
     t = Table(data=d, colnames=['exp', 'set', 'LM', 'LMW', 'WER', 'SER'])
-    print str(t)
+    print(str(t))
     if args.latex:
-        print Table2LatexTable(t)
+        print(Table2LatexTable(t))
diff --git a/egs/vystadial_en/s5/local/results.py b/egs/vystadial_en/s5/local/results.py
index a7c19af214c..f37109d5fcb 100755
--- a/egs/vystadial_en/s5/local/results.py
+++ b/egs/vystadial_en/s5/local/results.py
@@ -14,6 +14,8 @@
 # MERCHANTABLITY OR NON-INFRINGEMENT.
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License. #
+from __future__ import division
+from __future__ import print_function
 import argparse
 import glob
 import sys
@@ -29,8 +31,8 @@ def extract_stat(wer_file):
             ser = float(s[2].split()[1])
 
     except Exception as e:
-        print sys.stderr, 'Error parsing file %s' % wer_file
-        print sys.stderr, str(e)
+        print(sys.stderr, 'Error parsing file %s' % wer_file)
+        print(sys.stderr, str(e))
     return wer, ser
 
 
@@ -47,8 +49,8 @@ def extractResults(path):
             wer, ser = extract_stat(wf)
             table.append((exp, dataset, lm,  lm_w, wer, ser))
         except Exception as e:
-            print >> sys.stderr, 'failed to parse %s' % wf
-            print >> sys.stderr, str(e)
+            print('failed to parse %s' % wf, file=sys.stderr)
+            print(str(e), file=sys.stderr)
     return table
 
 
@@ -105,7 +107,7 @@ def Table2LatexTable(table):
 
 def createSmallTable(r):
     d = []
-    for k, v in r.iteritems():
+    for k, v in r.items():
         w, s, r = v
         if w == []:
             minw = None
@@ -115,7 +117,7 @@ def createSmallTable(r):
             mins = None
         else:
             mins = min(s)  # returns tuple if s is list of tuples
-        mean_r = sum(r) / float(len(r))
+        mean_r = float(sum(r)) / len(r)
         d.append([k, mean_r, minw, mins])
     t = Table(d, ['exp', 'RT coef', 'WER', 'SER'])
     return t
@@ -167,7 +169,7 @@ def createSmallTable(r):
 
     # remove duplicates: duplicates if equal mimimum wer in dev set
     min_dev_un = [(e, lm, lmw) for ((e, lm), lmw) in
-                  dict([((e, lm), lmw) for e, lm, lmw in min_dev]).items()]
+                  list(dict([((e, lm), lmw) for e, lm, lmw in min_dev]).items())]
     # sort according LM -> sort results according experiment & LMs
     min_dev_un.sort(key=lambda x: (x[1], x[0]))
 
@@ -182,6 +184,6 @@ def createSmallTable(r):
         d.append(x[0])
 
     t = Table(data=d, colnames=['exp', 'set', 'LM', 'LMW', 'WER', 'SER'])
-    print str(t)
+    print(str(t))
     if args.latex:
-        print Table2LatexTable(t)
+        print(Table2LatexTable(t))
diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
index 1724c057e12..526059b7b90 100755
--- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
+++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
@@ -220,6 +220,7 @@ if [ $stage -le 16 ]; then
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=2000" \
     --trainer.dropout-schedule $dropout_schedule \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=10 \
diff --git a/egs/wsj/s5/steps/cleanup/combine_short_segments.py b/egs/wsj/s5/steps/cleanup/combine_short_segments.py
index 1d14bd2a57f..099b92882a9 100755
--- a/egs/wsj/s5/steps/cleanup/combine_short_segments.py
+++ b/egs/wsj/s5/steps/cleanup/combine_short_segments.py
@@ -284,7 +284,7 @@ def CombineSegments(input_dir, output_dir, minimum_duration):
                 assert(cur_utt_dur == combined_duration)
 
                 # now modify the utts list
-                combined_indices = range(left_index, right_index + 1)
+                combined_indices = list(range(left_index, right_index + 1))
                 # start popping from the largest index so that the lower
                 # indexes are valid
                 for i in combined_indices[::-1]:
diff --git a/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py b/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py
index 414875f9013..a33ba85d9fa 100755
--- a/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py
+++ b/egs/wsj/s5/steps/cleanup/internal/get_pron_stats.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import sys
 import warnings
@@ -211,7 +212,7 @@ def GetStatsFromCtmProns(silphones, optional_silence, non_scored_words, ctm_pron
     return stats
 
 def WriteStats(stats, file_handle):            
-    for word_pron, count in stats.iteritems():
+    for word_pron, count in stats.items():
         print('{0} {1} {2}'.format(count, word_pron[0], word_pron[1]), file=file_handle)
     file_handle.close()
 
diff --git a/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py b/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py
index f37fa866b0f..e41a67705e9 100755
--- a/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py
+++ b/egs/wsj/s5/steps/cleanup/internal/make_one_biased_lm.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 import sys
 import argparse
 import math
@@ -47,7 +48,7 @@
 
 
 
-class NgramCounts:
+class NgramCounts(object):
     ## A note on data-structure.
     ## Firstly, all words are represented as integers.
     ## We store n-gram counts as an array, indexed by (history-length == n-gram order minus one)
@@ -139,7 +140,7 @@ def GetHistToTotalCount(self):
     # LM-states that would back off to 'this' lm-state, in the total.
     def CompletelyDiscountLowCountStates(self, min_count):
         hist_to_total_count = self.GetHistToTotalCount()
-        for n in reversed(range(2, self.ngram_order)):
+        for n in reversed(list(range(2, self.ngram_order))):
             this_order_counts = self.counts[n]
             for hist in this_order_counts.keys():
                 if hist_to_total_count[hist] < min_count:
@@ -156,7 +157,7 @@ def CompletelyDiscountLowCountStates(self, min_count):
     # with interpolation).
     def ApplyBackoff(self, D):
         assert D > 0.0 and D < 1.0
-        for n in reversed(range(1, self.ngram_order)):
+        for n in reversed(list(range(1, self.ngram_order))):
             this_order_counts = self.counts[n]
             for hist, word_to_count in this_order_counts.items():
                 backoff_hist = hist[1:]
@@ -182,7 +183,7 @@ def Print(self, info_string):
         for this_order_counts in self.counts:
             for hist, word_to_count in this_order_counts.items():
                 this_total_count = sum(word_to_count.values())
-                print(str(hist) + ': total={0} '.format(this_total_count),
+                print('{0}: total={1} '.format(hist, this_total_count),
                       end='', file=sys.stderr)
                 print(' '.join(['{0} -> {1} '.format(word, count)
                                 for word, count in word_to_count.items() ]),
@@ -242,10 +243,10 @@ def GetHistToStateMap(self):
     def GetProb(self, hist, word, total_count_map):
         total_count = total_count_map[hist]
         word_to_count = self.counts[len(hist)][hist]
-        prob = word_to_count[word] / total_count
+        prob = float(word_to_count[word]) / total_count
         if len(hist) > 0 and word != self.backoff_symbol:
             prob_in_backoff = self.GetProb(hist[1:], word, total_count_map)
-            backoff_prob = word_to_count[self.backoff_symbol] / total_count
+            backoff_prob = float(word_to_count[self.backoff_symbol]) / total_count
             prob += backoff_prob * prob_in_backoff
         return prob
 
@@ -262,7 +263,7 @@ def PrintAsFst(self, word_disambig_symbol):
         hist_to_state = self.GetHistToStateMap()
         total_count_map = self.GetTotalCountMap()
 
-        for n in [ 1, 0 ] + range(2, self.ngram_order):
+        for n in [ 1, 0 ] + list(range(2, self.ngram_order)):
             this_order_counts = self.counts[n]
             # For order 1, make sure the keys are sorted.
             keys = this_order_counts.keys() if n != 1 else sorted(this_order_counts.keys())
diff --git a/egs/wsj/s5/steps/cleanup/internal/resolve_ctm_edits_overlaps.py b/egs/wsj/s5/steps/cleanup/internal/resolve_ctm_edits_overlaps.py
index ad03b557bfe..1dae735304f 100755
--- a/egs/wsj/s5/steps/cleanup/internal/resolve_ctm_edits_overlaps.py
+++ b/egs/wsj/s5/steps/cleanup/internal/resolve_ctm_edits_overlaps.py
@@ -15,6 +15,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import collections
 import logging
@@ -228,7 +229,7 @@ def resolve_overlaps(ctm_edits, segments):
             try:
                 cur_utt_end_index = next(
                     (i for i, line in enumerate(ctm_edits_for_cur_utt)
-                     if line[2] + line[3] / 2.0 > window_length - overlap))
+                     if line[2] + line[3] / 2.0)> window_length - overlap))
             except StopIteration:
                 cur_utt_end_index = len(ctm_edits_for_cur_utt)
 
@@ -299,7 +300,7 @@ def run(args):
     segments, reco2utt = read_segments(args.segments)
     ctm_edits = read_ctm_edits(args.ctm_edits_in, segments)
 
-    for reco, utts in reco2utt.iteritems():
+    for reco, utts in reco2utt.items():
         ctm_edits_for_reco = []
         for utt in sorted(utts, key=lambda x: segments[x][1]):
             if (reco, utt) in ctm_edits:
diff --git a/egs/wsj/s5/steps/cleanup/internal/retrieve_similar_docs.py b/egs/wsj/s5/steps/cleanup/internal/retrieve_similar_docs.py
index eb0b18f0408..9594d2ecc60 100755
--- a/egs/wsj/s5/steps/cleanup/internal/retrieve_similar_docs.py
+++ b/egs/wsj/s5/steps/cleanup/internal/retrieve_similar_docs.py
@@ -223,7 +223,7 @@ def read_map(file_handle, num_values_per_key=None,
 
 def get_document_ids(source_docs, indexes):
     indexes = sorted(
-        [(key, value[0], value[1]) for key, value in indexes.iteritems()],
+        [(key, value[0], value[1]) for key, value in indexes.items()],
         key=lambda x: x[0])
 
     doc_ids = []
@@ -273,7 +273,7 @@ def run(args):
             "Did not get scores for query {0}".format(query_id))
 
         if args.verbose > 2:
-            for tup, score in scores.iteritems():
+            for tup, score in scores.items():
                 logger.debug("Score, {num}: {0} {1} {2}".format(
                     tup[0], tup[1], score, num=num_queries))
 
diff --git a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py
index 39f6d38d6bf..39d6cb6ed80 100755
--- a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py
+++ b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits.py
@@ -5,6 +5,7 @@
 # Apache 2.0
 
 from __future__ import print_function
+from __future__ import division
 import sys, operator, argparse, os
 from collections import defaultdict
 
@@ -171,7 +172,7 @@ def ComputeSegmentCores(split_lines_of_utt):
 
     return segment_ranges
 
-class Segment:
+class Segment(object):
     def __init__(self, split_lines_of_utt, start_index, end_index, debug_str = None):
         self.split_lines_of_utt = split_lines_of_utt
         # start_index is the index of the first line that appears in this
@@ -551,7 +552,7 @@ def PossiblyTruncateStartForJunkProportion(self):
         if candidate_start_index is None:
             return  # Nothing to do as there is no place to split.
         candidate_removed_piece_duration = candidate_start_time - self.StartTime()
-        if begin_junk_duration / candidate_removed_piece_duration < args.max_junk_proportion:
+        if float(begin_junk_duration) / candidate_removed_piece_duration < args.max_junk_proportion:
             return  # Nothing to do as the candidate piece to remove has too
                     # little junk.
         # OK, remove the piece.
@@ -593,7 +594,7 @@ def PossiblyTruncateEndForJunkProportion(self):
         if candidate_end_index is None:
             return  # Nothing to do as there is no place to split.
         candidate_removed_piece_duration = self.EndTime() - candidate_end_time
-        if end_junk_duration / candidate_removed_piece_duration < args.max_junk_proportion:
+        if float(end_junk_duration) / candidate_removed_piece_duration < args.max_junk_proportion:
             return  # Nothing to do as the candidate piece to remove has too
                     # little junk.
         # OK, remove the piece.
@@ -807,7 +808,7 @@ def TimeToString(time, frame_length):
 
 def WriteSegmentsForUtterance(text_output_handle, segments_output_handle,
                               old_utterance_name, segments):
-    num_digits = len(str(len(segments)))
+    num_digits = len('{}'.format(len(segments)))
     for n in range(len(segments)):
         segment = segments[n]
         # split utterances will be named foo-bar-1 foo-bar-2, etc.
@@ -840,24 +841,24 @@ def PrintDebugInfoForUtterance(ctm_edits_out_handle,
     info_to_print = []
     for n in range(len(segments_for_utterance)):
         segment = segments_for_utterance[n]
-        start_string = 'start-segment-' + str(n+1) + '[' + segment.DebugInfo() + ']'
+        start_string = 'start-segment-{0}[{1}]'.format(n+1, segment.DebugInfo())
         info_to_print.append( (segment.StartTime(), start_string) )
-        end_string = 'end-segment-' + str(n+1)
+        end_string = 'end-segment-{}'.format(n+1)
         info_to_print.append( (segment.EndTime(), end_string) )
     # for segments that were deleted we print info like start-deleted-segment-1, and
     # otherwise similar info to segments that were retained.
     for n in range(len(deleted_segments_for_utterance)):
         segment = deleted_segments_for_utterance[n]
-        start_string = 'start-deleted-segment-' + str(n+1) + '[' + segment.DebugInfo() + ']'
+        start_string = 'start-deleted-segment-{0}[{1}]'.format(n+1, segment.DebugInfo())
         info_to_print.append( (segment.StartTime(), start_string) )
-        end_string = 'end-deleted-segment-' + str(n+1)
+        end_string = 'end-deleted-segment-{}'.format(n+1)
         info_to_print.append( (segment.EndTime(), end_string) )
 
     info_to_print = sorted(info_to_print)
 
     for i in range(len(split_lines_of_cur_utterance)):
         split_line=split_lines_of_cur_utterance[i]
-        split_line[0] += '[' + str(i) + ']'  # add an index like [0], [1], to
+        split_line[0] += '[{}]'.format(i)    # add an index like [0], [1], to
                                              # the utterance-id so we can easily
                                              # look up segment indexes.
         start_time = float(split_line[2])
diff --git a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits_mild.py b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits_mild.py
index 46a9369ae98..9fcc2e89360 100755
--- a/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits_mild.py
+++ b/egs/wsj/s5/steps/cleanup/internal/segment_ctm_edits_mild.py
@@ -5,6 +5,7 @@
 # Apache 2.0
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import copy
 import logging
@@ -869,8 +870,7 @@ def relax_boundary_truncation(self, min_segment_length,
         #        a * (length_with_truncation - length_with_relaxed_boundaries)
         # -> a = (length_cutoff - length_with_relaxed_boundaries)
         #        / (length_with_truncation - length_with_relaxed_boundaries)
-        a = ((length_cutoff - length_with_relaxed_boundaries)
-             / (length_with_truncation - length_with_relaxed_boundaries))
+        a = (length_cutoff - length_with_relaxed_boundaries) / (length_with_truncation - length_with_relaxed_boundaries)
         if a < 0.0 or a > 1.0:
             # TODO(vimal): Should this be an error?
             _global_logger.warn("bad 'a' value = %.4f", a)
@@ -1756,7 +1756,7 @@ def time_to_string(time, frame_length):
     """ Gives time in string form as an exact multiple of the frame-length,
     e.g. 0.01 (after rounding).
     """
-    n = round(time / frame_length)
+    n = round(time /frame_length)
     assert n >= 0
     # The next function call will remove trailing zeros while printing it, so
     # that e.g. 0.01 will be printed as 0.01 and not 0.0099999999999999.  It
diff --git a/egs/wsj/s5/steps/cleanup/internal/taint_ctm_edits.py b/egs/wsj/s5/steps/cleanup/internal/taint_ctm_edits.py
index 85e1df997a7..4e0e1ae2283 100755
--- a/egs/wsj/s5/steps/cleanup/internal/taint_ctm_edits.py
+++ b/egs/wsj/s5/steps/cleanup/internal/taint_ctm_edits.py
@@ -201,7 +201,7 @@ def PrintNonScoredStats():
             percent_modified, percent_of_incorrect_modified),
           file = sys.stderr)
 
-    keys = sorted(ref_change_stats.keys(), reverse=True,
+    keys = sorted(list(ref_change_stats.keys()), reverse=True,
                   key = lambda x: ref_change_stats[x])
     num_keys_to_print = 40 if args.verbose >= 2 else 10
 
@@ -219,7 +219,7 @@ def PrintStats():
         return
     print("taint_ctm_edits.py: processed {0} input lines, whose edit-types were: ".format(tot_lines) +
           ', '.join([ '%s = %.2f%%' % (k, num_lines_of_type[k] * 100.0 / tot_lines)
-                      for k in sorted(num_lines_of_type.keys(), reverse = True,
+                      for k in sorted(list(num_lines_of_type.keys()), reverse = True,
                                       key = lambda k: num_lines_of_type[k])  ]),
           file = sys.stderr)
 
diff --git a/egs/wsj/s5/steps/cleanup/internal/tf_idf.py b/egs/wsj/s5/steps/cleanup/internal/tf_idf.py
index 9b2f4d693a6..a098d9f2a44 100644
--- a/egs/wsj/s5/steps/cleanup/internal/tf_idf.py
+++ b/egs/wsj/s5/steps/cleanup/internal/tf_idf.py
@@ -6,6 +6,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import logging
 import math
 import re
@@ -51,8 +52,7 @@ def get_inverse_document_frequency(self, term, weighting_scheme="log"):
         if weighting_scheme == "log-smoothed":
             return math.log(1.0 + float(self.num_docs) / (1.0 + n_t))
         if weighting_scheme == "probabilitic":
-            return math.log((self.num_docs - n_t - 1)
-                            / (1.0 + n_t))
+            return math.log((self.num_docs - n_t - 1) / (1.0 + n_t))
 
     def accumulate(self, term):
         """Adds one count to the number of docs containing the term "term".
@@ -66,7 +66,7 @@ def write(self, file_handle):
         <term-1> <term-2> ... <term-N> <num-docs>
         for n-gram (<term-1>, ... <term-N>)
         """
-        for term, num in self.num_docs_for_term.iteritems():
+        for term, num in self.num_docs_for_term.items():
             if num == 0:
                 continue
             assert isinstance(term, tuple)
@@ -135,7 +135,7 @@ def compute_term_stats(self, idf_stats=None):
         based on the stored raw counts."""
         if len(self.raw_counts) == 0:
             raise RuntimeError("No (term, doc) found in tf-stats.")
-        for tup, counts in self.raw_counts.iteritems():
+        for tup, counts in self.raw_counts.items():
             term = tup[0]
 
             if counts > self.max_counts_for_term.get(term, 0):
@@ -149,7 +149,7 @@ def __str__(self):
         <n-gram order> <term-1> <term-2> ... <term-n> <document-id> <counts>
         """
         lines = []
-        for tup, counts in self.raw_counts.iteritems():
+        for tup, counts in self.raw_counts.items():
             term, doc = tup
             lines.append("{order} {term} {doc} {counts}".format(
                 order=len(term), term=" ".join(term),
@@ -225,7 +225,7 @@ def compute_similarity_scores(self, source_tfidf, source_docs=None,
         num_terms_per_doc = {}
         similarity_scores = {}
 
-        for tup, value in self.tf_idf.iteritems():
+        for tup, value in self.tf_idf.items():
             term, doc = tup
             num_terms_per_doc[doc] = num_terms_per_doc.get(doc, 0) + 1
 
@@ -253,19 +253,18 @@ def compute_similarity_scores(self, source_tfidf, source_docs=None,
                         similarity_scores.get((doc, src_doc), 0)
                         + src_value * value)
             else:
-                for src_tup, src_value in source_tfidf.tf_idf.iteritems():
+                for src_tup, src_value in source_tfidf.tf_idf.items():
                     similarity_scores[(doc, src_doc)] = (
                         similarity_scores.get((doc, src_doc), 0)
                         + src_value * value)
 
         if do_length_normalization:
-            for doc_pair, value in similarity_scores.iteritems():
+            for doc_pair, value in similarity_scores.items():
                 doc, src_doc = doc_pair
-                similarity_scores[(doc, src_doc)] = (value
-                                                     / num_terms_per_doc[doc])
+                similarity_scores[(doc, src_doc)] = value / num_terms_per_doc[doc]
 
         if logger.isEnabledFor(logging.DEBUG):
-            for doc, count in num_terms_per_doc.iteritems():
+            for doc, count in num_terms_per_doc.items():
                 logger.debug(
                     'Seen {0} terms in query document {1}'.format(count, doc))
 
@@ -329,7 +328,7 @@ def write(self, tf_idf_file):
         """Writes TFIDF object to file."""
 
         print ("<TFIDF>", file=tf_idf_file)
-        for tup, value in self.tf_idf.iteritems():
+        for tup, value in self.tf_idf.items():
             term, doc = tup
             print("{order} {term} {doc} {tfidf}".format(
                 order=len(term), term=" ".join(term),
diff --git a/egs/wsj/s5/steps/conf/append_eval_to_ctm.py b/egs/wsj/s5/steps/conf/append_eval_to_ctm.py
index f8e2aad891d..90679d2b341 100755
--- a/egs/wsj/s5/steps/conf/append_eval_to_ctm.py
+++ b/egs/wsj/s5/steps/conf/append_eval_to_ctm.py
@@ -3,6 +3,7 @@
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
+from __future__ import print_function
 import sys,operator
 
 # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
@@ -15,7 +16,7 @@
 #  'U' = unknown (not part of scored segment)
 
 if len(sys.argv) != 4:
-  print 'Usage: %s eval-in ctm-in ctm-eval-out' % __file__
+  print('Usage: %s eval-in ctm-in ctm-eval-out' % __file__)
   sys.exit(1)
 dummy, eval_in, ctm_in, ctm_eval_out = sys.argv
 
@@ -54,7 +55,7 @@
 
 # Build the 'ctm' with 'eval' column added,
 ctm_eval = []
-for utt,ctm_part in ctm.iteritems():
+for utt,ctm_part in ctm.items():
   ctm_part.sort(key = operator.itemgetter(2)) # Sort by 'beg' time,
   try:
     # merging 'tuples' by '+', the record has format:
@@ -69,7 +70,7 @@
     # append,
     ctm_eval.extend(merged)
   except KeyError:
-    print 'Missing key', utt, 'in the word-evaluation stats from scoring'
+    print('Missing key', utt, 'in the word-evaluation stats from scoring')
 
 # Sort again,
 ctm_eval.sort(key = operator.itemgetter(0,1,2))
diff --git a/egs/wsj/s5/steps/conf/append_prf_to_ctm.py b/egs/wsj/s5/steps/conf/append_prf_to_ctm.py
index 547b6176c9f..42acc5e22b7 100755
--- a/egs/wsj/s5/steps/conf/append_prf_to_ctm.py
+++ b/egs/wsj/s5/steps/conf/append_prf_to_ctm.py
@@ -3,6 +3,7 @@
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
+from __future__ import print_function
 import sys
 
 # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
@@ -16,7 +17,7 @@
 
 # Parse options,
 if len(sys.argv) != 4:
-  print "Usage: %s prf ctm_in ctm_out" % __file__
+  print("Usage: %s prf ctm_in ctm_out" % __file__)
   sys.exit(1)
 prf_file, ctm_file, ctm_out_file = sys.argv[1:]
 
diff --git a/egs/wsj/s5/steps/conf/convert_ctm_to_tra.py b/egs/wsj/s5/steps/conf/convert_ctm_to_tra.py
index 8fec0064fd7..25899e19264 100755
--- a/egs/wsj/s5/steps/conf/convert_ctm_to_tra.py
+++ b/egs/wsj/s5/steps/conf/convert_ctm_to_tra.py
@@ -3,6 +3,7 @@
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
+from __future__ import print_function
 import sys, operator
 
 # This scripts loads a 'ctm' file and converts it into the 'tra' format:
@@ -14,7 +15,7 @@
 # - confidences
 
 if len(sys.argv) != 3:
-  print 'Usage: %s ctm-in tra-out' % __file__
+  print('Usage: %s ctm-in tra-out' % __file__)
   sys.exit(1)
 dummy, ctm_in, tra_out = sys.argv
 
@@ -31,7 +32,7 @@
 
 # Store the in 'tra' format,
 with open(tra_out,'w') as f:
-  for utt,tuples in tra.iteritems():
+  for utt,tuples in tra.items():
     tuples.sort(key = operator.itemgetter(0)) # Sort by 'beg' time,
     f.write('%s %s\n' % (utt,' '.join([t[1] for t in tuples])))
 
diff --git a/egs/wsj/s5/steps/conf/parse_arpa_unigrams.py b/egs/wsj/s5/steps/conf/parse_arpa_unigrams.py
index 1be32d4c4d7..f0a2fe13497 100755
--- a/egs/wsj/s5/steps/conf/parse_arpa_unigrams.py
+++ b/egs/wsj/s5/steps/conf/parse_arpa_unigrams.py
@@ -3,11 +3,12 @@
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
+from __future__ import print_function
 import sys, gzip, re
 
 # Parse options,
 if len(sys.argv) != 4:
-  print "Usage: %s <words.txt> <arpa-gz> <unigrams>" % __file__
+  print("Usage: %s <words.txt> <arpa-gz> <unigrams>" % __file__)
   sys.exit(0)
 words_txt, arpa_gz, unigrams_out = sys.argv[1:]
 
@@ -31,7 +32,7 @@
 # Create list, 'wrd id log_p_unigram',
 words_unigram = [[wrd, id, (wrd_log10[wrd] if wrd in wrd_log10 else -99)] for wrd,id in words ]
 
-print >>sys.stderr, words_unigram[0]
+print(words_unigram[0], file=sys.stderr)
 # Store,
 with open(unigrams_out,'w') as f:
   f.writelines(['%s %s %g\n' % (w,i,p) for (w,i,p) in words_unigram])
diff --git a/egs/wsj/s5/steps/conf/prepare_calibration_data.py b/egs/wsj/s5/steps/conf/prepare_calibration_data.py
index bc8f92a2f7f..c4da720ba71 100755
--- a/egs/wsj/s5/steps/conf/prepare_calibration_data.py
+++ b/egs/wsj/s5/steps/conf/prepare_calibration_data.py
@@ -3,6 +3,7 @@
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 # Apache 2.0
 
+from __future__ import division
 import sys, math
 
 from optparse import OptionParser
@@ -82,7 +83,7 @@
   depths = dict()
   for l in open(o.lattice_depth):
     utt,d = l.split(' ',1)
-    depths[utt] = map(int,d.split())
+    depths[utt] = [int(i) for i in d.split()]
 
 # Load the 'word_categories' mapping for categorical input features derived from 'lang/words.txt',
 wrd_to_cat = [ l.split() for l in open(word_categories_file) ]
diff --git a/egs/wsj/s5/steps/data/augment_data_dir.py b/egs/wsj/s5/steps/data/augment_data_dir.py
index 432b136e3b1..7edcdda2636 100755
--- a/egs/wsj/s5/steps/data/augment_data_dir.py
+++ b/egs/wsj/s5/steps/data/augment_data_dir.py
@@ -103,8 +103,8 @@ def AugmentWav(utt, wav, dur, fg_snr_opts, bg_snr_opts, fg_noise_utts, \
             tot_noise_dur += noise_dur + interval
             noises.append(noise)
 
-    start_times_str = "--start-times='" + ",".join(list(map(str,start_times))) + "'"
-    snrs_str = "--snrs='" + ",".join(list(map(str,snrs))) + "'"
+    start_times_str = "--start-times='" + ",".join([str(i) for i in start_times]) + "'"
+    snrs_str = "--snrs='" + ",".join([str(i) for i in snrs]) + "'"
     noises_str = "--additive-signals='" + ",".join(noises).strip() + "'"
 
     # If the wav is just a file
@@ -130,11 +130,11 @@ def CopyFileIfExists(utt_suffix, filename, input_dir, output_dir):
 
 def main():
     args = GetArgs()
-    fg_snrs = list(map(int, args.fg_snr_str.split(":")))
-    bg_snrs = list(map(int, args.bg_snr_str.split(":")))
+    fg_snrs = [int(i) for i in args.fg_snr_str.split(":")]
+    bg_snrs = [int(i) for i in args.bg_snr_str.split(":")]
     input_dir = args.input_dir
     output_dir = args.output_dir
-    num_bg_noises = list(map(int, args.num_bg_noises.split(":")))
+    num_bg_noises = [int(i) for i in args.num_bg_noises.split(":")]
     reco2dur = ParseFileToDict(input_dir + "/reco2dur",
         value_processor = lambda x: float(x[0]))
     wav_scp_file = open(input_dir + "/wav.scp", 'r').readlines()
diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py
index 570613855a0..189f4619ddb 100755
--- a/egs/wsj/s5/steps/data/reverberate_data_dir.py
+++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py
@@ -5,7 +5,6 @@
 # script to generate reverberated data
 
 # we're using python 3.x style print but want it to work in python 2.x,
-from __future__ import print_function
 import argparse, shlex, glob, math, os, random, sys, warnings, copy, imp, ast
 
 data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py')
@@ -121,17 +120,18 @@ def CheckArgs(args):
     return args
 
 
-class list_cyclic_iterator:
+class list_cyclic_iterator(object):
   def __init__(self, list):
     self.list_index = 0
     self.list = list
     random.shuffle(self.list)
 
-  def next(self):
+  def __next__(self):
     item = self.list[self.list_index]
     self.list_index = (self.list_index + 1) % len(self.list)
     return item
 
+  next = __next__  # for Python 2
 
 # This functions picks an item from the collection according to the associated probability distribution.
 # The probability estimate of each item in the collection is stored in the "probability" field of
@@ -218,11 +218,11 @@ def AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the in
             if noise.bg_fg_type == "background":
                 noise_rvb_command = """wav-reverberate --impulse-response="{0}" --duration={1}""".format(noise_rir.rir_rspecifier, speech_dur)
                 noise_addition_descriptor['start_times'].append(0)
-                noise_addition_descriptor['snrs'].append(background_snrs.next())
+                noise_addition_descriptor['snrs'].append(next(background_snrs))
             else:
                 noise_rvb_command = """wav-reverberate --impulse-response="{0}" """.format(noise_rir.rir_rspecifier)
                 noise_addition_descriptor['start_times'].append(round(random.random() * speech_dur, 2))
-                noise_addition_descriptor['snrs'].append(foreground_snrs.next())
+                noise_addition_descriptor['snrs'].append(next(foreground_snrs))
 
             # check if the rspecifier is a pipe or not
             if len(noise.noise_rspecifier.split()) == 1:
@@ -273,7 +273,7 @@ def GenerateReverberationOpts(room_dict,  # the room dictionary, please refer to
         else:
             noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur))
         noise_addition_descriptor['start_times'].append(0)
-        noise_addition_descriptor['snrs'].append(background_snrs.next())
+        noise_addition_descriptor['snrs'].append(next(background_snrs))
 
     noise_addition_descriptor = AddPointSourceNoise(noise_addition_descriptor,  # descriptor to store the information of the noise added
                                                     room,  # the room selected
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py b/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
index 56b9f69b3c9..6ed2bf78115 100755
--- a/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
+++ b/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
@@ -5,6 +5,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import sys, os
 from collections import defaultdict
diff --git a/egs/wsj/s5/steps/dict/apply_lexicon_edits.py b/egs/wsj/s5/steps/dict/apply_lexicon_edits.py
index a5bdbc30d46..f8568971fb7 100755
--- a/egs/wsj/s5/steps/dict/apply_lexicon_edits.py
+++ b/egs/wsj/s5/steps/dict/apply_lexicon_edits.py
@@ -10,7 +10,7 @@
 def GetArgs():
     parser = argparse.ArgumentParser(description = "Apply an lexicon edits file (output from steps/dict/select_prons_bayesian.py)to an input lexicon"
                                      "to produce a learned lexicon.",
-                                     epilog = "See steps/dict/learn_lexicon.sh for example")
+                                     epilog = "See steps/dict/learn_lexicon_greedy.sh for example")
 
     parser.add_argument("in_lexicon", metavar='<in-lexicon>', type = str,
                         help = "Input lexicon. Each line must be <word> <phones>.")
diff --git a/egs/wsj/s5/steps/dict/get_pron_stats.py b/egs/wsj/s5/steps/dict/get_pron_stats.py
index b5202a69abb..e8106bdd1ac 100755
--- a/egs/wsj/s5/steps/dict/get_pron_stats.py
+++ b/egs/wsj/s5/steps/dict/get_pron_stats.py
@@ -10,15 +10,16 @@
 import sys
 
 def GetArgs():
-    parser = argparse.ArgumentParser(description = "Accumulate statistics from lattice-alignment outputs for lexicon"
-                                     "learning. The inputs are a file containing arc level information from lattice-align-words,"
-                                     "and a map which maps word-position-dependent phones to word-position-independent phones"
-                                     "(output from steps/cleanup/debug_lexicon.txt). The output contains accumulated soft-counts"
-                                     "of pronunciations",
-                                     epilog = "cat exp/tri3_lex_0.4_work/lats/arc_info_sym.*.txt \\|"
-                                              "  steps/dict/get_pron_stats.py - exp/tri3_lex_0.4_work/phone_decode/phone_map.txt \\"
-                                              "  exp/tri3_lex_0.4_work/lats/pron_stats.txt"
-                                              "See steps/dict/learn_lexicon.sh for examples in detail.")
+    parser = argparse.ArgumentParser(
+        description = "Accumulate statistics from lattice-alignment outputs for lexicon"
+        "learning. The inputs are a file containing arc level information from lattice-align-words,"
+        "and a map which maps word-position-dependent phones to word-position-independent phones"
+        "(output from steps/cleanup/debug_lexicon.txt). The output contains accumulated soft-counts"
+        "of pronunciations",
+        epilog = "cat exp/tri3_lex_0.4_work/lats/arc_info_sym.*.txt \\|"
+        "  steps/dict/get_pron_stats.py - exp/tri3_lex_0.4_work/phone_decode/phone_map.txt \\"
+        "  exp/tri3_lex_0.4_work/lats/pron_stats.txt"
+        "See steps/dict/learn_lexicon_greedy.sh for examples in detail.")
 
     parser.add_argument("arc_info_file", metavar = "<arc-info-file>", type = str,
                         help = "Input file containing per arc statistics; "
@@ -75,14 +76,14 @@ def GetStatsFromArcInfo(arc_info_file_handle, phone_map_handle):
         prons[word].add(phones)
         stats_unmapped[(word, phones)] = stats_unmapped.get((word, phones), 0) + count
      
-    for word_pron, count in stats_unmapped.iteritems():
+    for word_pron, count in stats_unmapped.items():
         phones_unmapped = word_pron[1].split()
         phones = [phone_map[phone] for phone in phones_unmapped]
         stats[(word_pron[0], " ".join(phones))] = count
     return stats
 
 def WriteStats(stats, file_handle):
-    for word_pron, count in stats.iteritems():
+    for word_pron, count in stats.items():
         print('{2} {0} {1}'.format(word_pron[0], word_pron[1], count),
               file=file_handle)
     file_handle.close()
diff --git a/egs/wsj/s5/steps/dict/internal/get_subsegments.py b/egs/wsj/s5/steps/dict/internal/get_subsegments.py
new file mode 100755
index 00000000000..c431b4c7066
--- /dev/null
+++ b/egs/wsj/s5/steps/dict/internal/get_subsegments.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Xiaohui Zhang
+# Apache 2.0.
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import argparse
+import sys
+import string
+
+def GetArgs():
+    parser = argparse.ArgumentParser(
+        description = "The purpose of this script is to use a ctm and a vocab file"
+        "to extract sub-utterances and a sub-segmentation. Extracted sub-utterances"
+        "are all the strings of consecutive in-vocab words from the ctm"
+        "surrounded by an out-of-vocab word at each end if present.",
+        epilog = "e.g. steps/dict/internal/get_subsegments.py exp/tri3_lex_0.4_work/phonetic_decoding/word.ctm \\"
+        "exp/tri3_lex_0.4_work/learn_vocab.txt exp/tri3_lex_0.4_work/resegmentation/subsegments \\"
+        "exp/tri3_lex_0.4_work/resegmentation/text"
+        "See steps/dict/learn_lexicon_greedy.sh for an example.")
+
+    parser.add_argument("ctm", metavar='<ctm>', type = str,
+                        help = "Input ctm file."
+                        "each line must be <utt-id> <chanel> <start-time> <duration> <word>")
+    parser.add_argument("vocab", metavar='<vocab>', type = str,
+                        help = "Vocab file."
+                        "each line must be <word>")
+    parser.add_argument("subsegment", metavar='<subsegtment>', type = str,
+                        help = "Subsegment file. Each line is in format:"
+                        "<new-utt> <old-utt> <start-time-within-old-utt> <end-time-within-old-utt>")
+    parser.add_argument("text", metavar='<text>', type = str,
+                        help = "Text file. Each line is in format:"
+                        " <new-utt> <word1> <word2> ... <wordN>.")
+  
+    print (' '.join(sys.argv), file = sys.stderr)
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if args.ctm == "-":
+        args.ctm_handle = sys.stdin
+    else:
+        args.ctm_handle = open(args.ctm)
+
+    if args.vocab is not '':
+        if args.vocab == "-":
+            args.vocab_handle = sys.stdout
+        else:
+            args.vocab_handle = open(args.vocab)
+
+    args.subsegment_handle = open(args.subsegment, 'w')
+    args.text_handle = open(args.text, 'w')
+
+    return args
+
+def GetSubsegments(args, vocab):
+    sub_utt = list()
+    last_is_oov = False
+    is_oov = False
+    utt_id_last = None
+    start_times = {}
+    end_times = {}
+    sub_utts = {}
+    sub_utt_id = 1
+    sub_utt_id_last = 1
+    end_time_last = 0.0
+    for line in args.ctm_handle:
+        splits = line.strip().split()
+        if len(splits) < 5:
+            raise Exception("problematic line",line)
+
+        utt_id = splits[0]
+        start = float(splits[2])
+        dur = float(splits[3])
+        word = splits[4]
+        if utt_id != utt_id_last:
+            sub_utt_id = 1
+            if len(sub_utt)>1:
+                sub_utts[utt_id_last+'-'+str(sub_utt_id_last)] = (utt_id_last, sub_utt)
+                end_times[utt_id_last+'-'+str(sub_utt_id_last)] = ent_time_last
+            sub_utt = []
+            start_times[utt_id+'-'+str(sub_utt_id)] = start
+            is_oov_last = False
+        if word == '<eps>':
+            is_oov = True
+            end_times[utt_id+'-'+str(sub_utt_id)] = start + dur
+        elif word in vocab:
+            is_oov = True
+            sub_utt.append(word)
+            end_times[utt_id+'-'+str(sub_utt_id)] = start + dur
+        else:
+            is_oov = False
+            if is_oov_last == True:
+                sub_utt.append(word)
+                sub_utts[utt_id+'-'+str(sub_utt_id_last)] = (utt_id, sub_utt)
+                end_times[utt_id+'-'+str(sub_utt_id_last)] = start + dur
+                sub_utt_id += 1
+            sub_utt = [word]
+            start_times[utt_id+'-'+str(sub_utt_id)] = start
+        utt_id_last = utt_id
+        sub_utt_id_last = sub_utt_id
+        is_oov_last = is_oov
+        ent_time_last = start + dur
+        
+    if is_oov:
+        if word != '<eps>':
+            sub_utt.append(word)
+        sub_utts[utt_id+'-'+str(sub_utt_id_last)] = (utt_id, sub_utt)
+        end_times[utt_id+'-'+str(sub_utt_id_last)] = start + dur
+
+    for utt,v in sorted(sub_utts.items()):
+        print(utt, ' '.join(sub_utts[utt][1]), file=args.text_handle)
+        print(utt, sub_utts[utt][0], start_times[utt], end_times[utt], file=args.subsegment_handle)
+
+def ReadVocab(vocab_file_handle):
+    vocab = set()
+    if vocab_file_handle:
+        for line in vocab_file_handle.readlines():
+            splits = line.strip().split()
+            if len(splits) == 0:
+                continue
+            if len(splits) > 1:
+                raise Exception('Invalid format of line ' + line
+                                    + ' in vocab file.')
+            word = splits[0]
+            vocab.add(word)
+    return vocab
+
+def Main():
+    args = GetArgs()
+
+    vocab = ReadVocab(args.vocab_handle)
+    GetSubsegments(args, vocab)
+   
+if __name__ == "__main__":
+    Main()
diff --git a/egs/wsj/s5/steps/dict/internal/prune_pron_candidates.py b/egs/wsj/s5/steps/dict/internal/prune_pron_candidates.py
index 1f2863424f3..60c7f75bbe8 100755
--- a/egs/wsj/s5/steps/dict/internal/prune_pron_candidates.py
+++ b/egs/wsj/s5/steps/dict/internal/prune_pron_candidates.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright 2016  Xiaohui Zhang
+# Copyright 2018  Xiaohui Zhang
 # Apache 2.0.
 
 from __future__ import print_function
@@ -10,27 +10,36 @@
 import math
 
 def GetArgs():
-    parser = argparse.ArgumentParser(description = "Prune pronunciation candidates based on soft-counts from lattice-alignment"
-                                     "outputs, and a reference lexicon. Basically, for each word we sort all pronunciation"
-                                     "cadidates according to their soft-counts, and then select the top r * N candidates"
-                                     "(For words in the reference lexicon, N = # pron variants given by the reference"
-                                     "lexicon; For oov words, N = avg. # pron variants per word in the reference lexicon)."
-                                     "r is a user-specified constant, like 2.",
-                                     epilog = "See steps/dict/learn_lexicon.sh for example")
-
-    parser.add_argument("--r", type = float, default = "2.0",
-                        help = "a user-specified ratio parameter which determines how many"
-                        "pronunciation candidates we want to keep for each word.")
+    parser = argparse.ArgumentParser(
+        description = "Prune pronunciation candidates based on soft-counts from lattice-alignment"
+        "outputs, and a reference lexicon. Basically, for each word we sort all pronunciation"
+        "cadidates according to their soft-counts, and then select the top variant-counts-ratio * N candidates"
+        "(For words in the reference lexicon, N = # pron variants given by the reference"
+        "lexicon; For oov words, N = avg. # pron variants per word in the reference lexicon).",
+        epilog = "See steps/dict/learn_lexicon_greedy.sh for example")
+
+    parser.add_argument("--variant-counts-ratio", type = float, default = "3.0",
+                        help = "A user-specified ratio parameter which determines how many"
+                        "pronunciation candidates we want to keep for each word at most.")
     parser.add_argument("pron_stats", metavar = "<pron-stats>", type = str,
-                        help = "File containing soft-counts of all pronounciation candidates; "
+                        help = "File containing soft-counts of pronounciation candidates; "
                         "each line must be <soft-counts> <word> <phones>")
+    parser.add_argument("lexicon_phonetic_decoding", metavar = "<lexicon-phonetic-decoding>", type = str,
+                        help = "Lexicon containing pronunciation candidates from phonetic decoding."
+                        "each line must be <word> <phones>")
+    parser.add_argument("lexiconp_g2p", metavar = "<lexiconp-g2p>", type = str,
+                        help = "Lexicon with probabilities for pronunciation candidates from G2P."
+                        "each line must be <prob> <word> <phones>")
     parser.add_argument("ref_lexicon", metavar = "<ref-lexicon>", type = str,
                         help = "Reference lexicon file, where we obtain # pron variants for"
                         "each word, based on which we prune the pron candidates."
                         "Each line must be <word> <phones>")
-    parser.add_argument("pruned_prons", metavar = "<pruned-prons>", type = str,
-                        help = "An output file in lexicon format, which contains prons we want to" 
-                        "prune off from the pron_stats file.")
+    parser.add_argument("lexicon_phonetic_decoding_pruned", metavar = "<lexicon-phonetic-decoding-pruned>", type = str,
+                        help = "Output lexicon containing pronunciation candidates from phonetic decoding after pruning."
+                        "each line must be <word> <phones>")
+    parser.add_argument("lexicon_g2p_pruned", metavar = "<lexicon-g2p-pruned>", type = str,
+                        help = "Output lexicon containing pronunciation candidates from G2P after pruning."
+                        "each line must be <word> <phones>")
 
     print (' '.join(sys.argv), file=sys.stderr)
 
@@ -40,12 +49,13 @@ def GetArgs():
     return args
 
 def CheckArgs(args):
+    print(args)
     args.pron_stats_handle = open(args.pron_stats)
+    args.lexicon_phonetic_decoding_handle = open(args.lexicon_phonetic_decoding)
+    args.lexiconp_g2p_handle = open(args.lexiconp_g2p)
     args.ref_lexicon_handle = open(args.ref_lexicon)
-    if args.pruned_prons == "-":
-        args.pruned_prons_handle = sys.stdout
-    else:
-        args.pruned_prons_handle = open(args.pruned_prons, "w")
+    args.lexicon_phonetic_decoding_pruned_handle = open(args.lexicon_phonetic_decoding_pruned, "w")
+    args.lexicon_g2p_pruned_handle = open(args.lexicon_g2p_pruned, "w")
     return args
 
 def ReadStats(pron_stats_handle):
@@ -62,13 +72,11 @@ def ReadStats(pron_stats_handle):
         phones = ' '.join(splits[2:])
         stats[word].append((phones, count))
 
-    for word, entry in stats.iteritems():
-        entry.sort(key=lambda x: x[1])
     return stats
 
-def ReadLexicon(ref_lexicon_handle):
-    ref_lexicon = defaultdict(set)
-    for line in ref_lexicon_handle.readlines():
+def ReadLexicon(lexicon_handle):
+    lexicon = defaultdict(set)
+    for line in lexicon_handle.readlines():
         splits = line.strip().split()
         if len(splits) == 0:
             continue
@@ -77,42 +85,74 @@ def ReadLexicon(ref_lexicon_handle):
                                 + ' in lexicon file.')
         word = splits[0]
         phones = ' '.join(splits[1:])
-        ref_lexicon[word].add(phones)
-    return ref_lexicon
+        lexicon[word].add(phones)
+    return lexicon
 
-def PruneProns(args, stats, ref_lexicon):
+def ReadLexiconp(lexiconp_handle):
+    lexicon = defaultdict(set)
+    pron_probs = defaultdict(float)
+    for line in lexiconp_handle.readlines():
+        splits = line.strip().split()
+        if len(splits) == 0:
+            continue
+        if len(splits) < 3:
+            raise Exception('Invalid format of line ' + line
+                                + ' in lexicon file.')
+        word = splits[1]
+        prob = float(splits[0])
+        phones = ' '.join(splits[2:])
+        pron_probs[(word, phones)] = prob
+        lexicon[word].add(phones)
+    return lexicon, pron_probs
+
+def PruneProns(args, stats, ref_lexicon, lexicon_phonetic_decoding, lexicon_g2p, lexicon_g2p_probs):
+    # For those pron candidates from lexicon_phonetic_decoding/g2p which don't
+    # have stats, we append them to the "stats" dict, with a zero count.
+    for word, entry in stats.iteritems():
+        prons_with_stats = set()
+        for (pron, count) in entry:
+            prons_with_stats.add(pron)
+        for pron in lexicon_g2p[word]:
+            if pron not in prons_with_stats:
+                entry.append((pron, lexicon_g2p_probs[(word, pron)]-1.0))
+        entry.sort(key=lambda x: x[1])
+    
     # Compute the average # pron variants counts per word in the reference lexicon.
     num_words_ref = 0
     num_prons_ref = 0
     for word, prons in ref_lexicon.iteritems():
         num_words_ref += 1
         num_prons_ref += len(prons)
-    avg_variants_counts_ref = math.ceil(float(num_prons_ref) / float(num_words_ref))
-
+    avg_variant_counts_ref = round(float(num_prons_ref) / float(num_words_ref))
     for word, entry in stats.iteritems():
         if word in ref_lexicon:
-            variants_counts = args.r * len(ref_lexicon[word])
+            variant_counts = args.variant_counts_ratio * len(ref_lexicon[word])
         else:
-            variants_counts = args.r * avg_variants_counts_ref
+            variant_counts = args.variant_counts_ratio * avg_variant_counts_ref
         num_variants = 0
-        while num_variants < variants_counts:
+        count = 0.0
+        while num_variants < variant_counts:
             try:
-                pron, prob = entry.pop()
-                if word not in ref_lexicon or pron not in ref_lexicon[word]:
+                pron, count = entry.pop()
+                if word in ref_lexicon and pron in ref_lexicon[word]:
+                    continue
+                if pron in lexicon_phonetic_decoding[word]:
+                    num_variants += 1
+                    print('{0} {1}'.format(word, pron), file=args.lexicon_phonetic_decoding_pruned_handle)
+                if pron in lexicon_g2p[word]:
                     num_variants += 1
+                    print('{0} {1}'.format(word, pron), file=args.lexicon_g2p_pruned_handle)
             except IndexError:
                 break
-        
-    for word, entry in stats.iteritems():
-        for pron, prob in entry:
-            if word not in ref_lexicon or pron not in ref_lexicon[word]:
-                print('{0} {1}'.format(word, pron), file=args.pruned_prons_handle)
 
 def Main():
     args = GetArgs()
     ref_lexicon = ReadLexicon(args.ref_lexicon_handle)
+    lexicon_phonetic_decoding = ReadLexicon(args.lexicon_phonetic_decoding_handle)
+    lexicon_g2p, lexicon_g2p_probs = ReadLexiconp(args.lexiconp_g2p_handle)
     stats = ReadStats(args.pron_stats_handle)
-    PruneProns(args, stats, ref_lexicon)
+
+    PruneProns(args, stats, ref_lexicon, lexicon_phonetic_decoding, lexicon_g2p, lexicon_g2p_probs)
 
 if __name__ == "__main__":
     Main()
diff --git a/egs/wsj/s5/steps/dict/internal/sum_arc_info.py b/egs/wsj/s5/steps/dict/internal/sum_arc_info.py
new file mode 100755
index 00000000000..5f02bc5fc29
--- /dev/null
+++ b/egs/wsj/s5/steps/dict/internal/sum_arc_info.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+# Copyright 2018   Xiaohui Zhang
+# Apache 2.0
+
+from __future__ import print_function
+from collections import defaultdict
+import argparse
+import sys
+
+class StrToBoolAction(argparse.Action):
+    """ A custom action to convert bools from shell format i.e., true/false
+        to python format i.e., True/False """
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values == "true":
+            setattr(namespace, self.dest, True)
+        elif values == "false":
+            setattr(namespace, self.dest, False)
+        else:
+            raise Exception("Unknown value {0} for --{1}".format(values, self.dest))
+
+
+def GetArgs():
+    parser = argparse.ArgumentParser(
+        description = "Accumulate statistics from per arc lattice statitics"
+        "for lexicon learning",
+        epilog = "See steps/dict/learn_lexicon_greedy.sh for example")
+
+    parser.add_argument("--set-sum-to-one", type = str, default = True,
+                        action = StrToBoolAction, choices = ["true", "false"],
+                        help = "If normalize posteriors such that the sum of "
+                        "pronunciation posteriors of a word in an utterance is 1.")
+    parser.add_argument("arc_info_file", metavar = "<arc-info-file>", type = str,
+                        help = "File containing per arc statistics; "
+                        "each line must be <utt-id> <word> <start-frame> <duration> <posterior>"
+                        "<phones-with-word-boundary-markers>")
+    parser.add_argument("phone_map", metavar = "<phone-map>", type = str,
+                        help = "An input phone map used to remove word boundary markers from phones;"
+                        "generated in steps/cleanup/debug_lexicon.sh")
+    parser.add_argument("stats_file", metavar = "<out-stats-file>", type = str,
+                        help = "Write accumulated statitistics to this file"
+                        "each line is <utt-id> <word> <start-frame> <posterior>"
+                        "<phones-without-word-boundary-markers>")
+
+    print (' '.join(sys.argv), file=sys.stderr)
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if args.arc_info_file == "-":
+        args.arc_info_file_handle = sys.stdin
+    else:
+        args.arc_info_file_handle = open(args.arc_info_file)
+    
+    args.phone_map_handle = open(args.phone_map)
+
+    if args.stats_file == "-":
+        args.stats_file_handle = sys.stdout
+    else:
+        args.stats_file_handle = open(args.stats_file, "w")
+
+    return args
+
+def Main():
+    args = GetArgs()
+
+    lexicon = defaultdict(list)
+    prons = defaultdict(list)
+    start_frames = {}
+    stats = defaultdict(lambda : defaultdict(float))
+    sum_tot = defaultdict(float)
+
+    phone_map = {}
+    for line in args.phone_map_handle.readlines():
+        splits = line.strip().split()
+        phone_map[splits[0]] = splits[1]
+
+    for line in args.arc_info_file_handle.readlines():
+        splits = line.strip().split()
+
+        if (len(splits) == 0):
+            continue
+
+        if (len(splits) < 6):
+            raise Exception('Invalid format of line ' + line
+                                + ' in ' + args.arc_info_file)
+
+        utt = splits[0]
+        start_frame = int(splits[1])
+        word = splits[4]
+        count = float(splits[3])
+        phones_unmapped = splits[5:]   
+        phones = [phone_map[phone] for phone in phones_unmapped]
+        phones = ' '.join(phones)
+        overlap = False
+        if word == '<eps>':
+            continue
+        if (word, utt) not in start_frames:
+            start_frames[(word, utt)] = start_frame
+
+        if (word, utt) in stats:
+            stats[word, utt][phones] = stats[word, utt].get(phones, 0) + count
+        else:
+            stats[(word, utt)][phones] = count
+        sum_tot[(word, utt)] += count
+
+        if phones not in prons[word]:
+            prons[word].append(phones)
+
+    for (word, utt) in stats:
+       count_sum = 0.0
+       counts = dict()
+       for phones in stats[(word, utt)]:
+           count = stats[(word, utt)][phones]
+           count_sum += count
+           counts[phones] = count
+       # By default we normalize the pron posteriors of each word in each utterance,
+       # so that they sum up exactly to one. If a word occurs two times in a utterance,
+       # the effect of this operation is to average the posteriors of these two occurences
+       # so that there's only one "equivalent occurence" of this word in the utterance.
+       # However, this case should be extremely rare if the utterances are already
+       # short sub-utterances produced by steps/dict/internal/get_subsegments.py
+       for phones in stats[(word, utt)]:
+           count = counts[phones] / count_sum
+           print(word, utt, start_frames[(word, utt)], count, phones, file=args.stats_file_handle)
+       # # Diagnostics info implying incomplete arc_info or multiple occurences of a word in a utterance:
+       # if count_sum < 0.9 or count_sum > 1.1:
+       #    print(word, utt, start_frame, count_sum, stats[word, utt], file=sys.stderr)
+
+    args.stats_file_handle.close()
+
+if __name__ == "__main__":
+    Main()
diff --git a/egs/wsj/s5/steps/dict/learn_lexicon.sh b/egs/wsj/s5/steps/dict/learn_lexicon_bayesian.sh
similarity index 93%
rename from egs/wsj/s5/steps/dict/learn_lexicon.sh
rename to egs/wsj/s5/steps/dict/learn_lexicon_bayesian.sh
index a719422b593..042f8f94da4 100755
--- a/egs/wsj/s5/steps/dict/learn_lexicon.sh
+++ b/egs/wsj/s5/steps/dict/learn_lexicon_bayesian.sh
@@ -36,6 +36,7 @@ oov_symbol=
 lexicon_g2p=
 
 min_prob=0.3
+variant_counts_ratio=8 
 variants_prob_mass=0.7
 variants_prob_mass_ref=0.9
 
@@ -93,6 +94,10 @@ if [ $# -lt 6 ] || [ $# -gt 7 ]; then
   echo "  --min-prob <float>           # The cut-off parameter used to select pronunciation candidates from phonetic"
   echo "                               # decoding. We remove pronunciations with probabilities less than this value"
   echo "                               # after normalizing the probs s.t. the max-prob is 1.0 for each word."
+  echo "  --variant-counts-ratio <int> # This ratio parameter determines the maximum number of pronunciation"
+  echo "                               # candidates we will keep for each word, after pruning according to lattice statistics from"
+  echo "                               # the first iteration of lattice generation. See steps/dict/internal/prune_pron_candidates.py"
+  echo "                               # for details."
   echo "  --prior-mean                 # Mean of priors (summing up to 1) assigned to three exclusive pronunciation"
   echo "         <float,float,float>   # source: reference lexicon, g2p, and phonetic decoding (used in the Bayesian"
   echo "                               # pronunciation selection procedure). We recommend setting a larger prior"
@@ -150,17 +155,17 @@ if [ $stage -le 0 ]; then
 
   # Remove non-scored-words from the reference lexicon.
   awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' $dir/non_scored_words \
-    $ref_dict/lexicon.txt | tr -s '\t' ' ' > $dir/ref_lexicon.txt
+    $ref_dict/lexicon.txt | tr -s '\t' ' ' | awk '$1=$1' > $dir/ref_lexicon.txt
 
   cat $dir/ref_lexicon.txt | awk '{print $1}' | sort | uniq > $dir/ref_vocab.txt
   awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' $dir/non_scored_words \
     $target_vocab | sort | uniq > $dir/target_vocab.txt
     
   # From the reference lexicon, we estimate the target_num_prons_per_word as,
-  # ceiling(avg. # prons per word in the reference lexicon). This'll be used as 
+  # round(avg. # prons per word in the reference lexicon). This'll be used as 
   # the upper bound of # pron variants per word when we apply G2P or select prons to
   # construct the learned lexicon in later stages.
-  python -c 'import sys; import math; print int(math.ceil(float(sys.argv[1])/float(sys.argv[2])))' \
+  python -c 'import sys; import math; print int(round(float(sys.argv[1])/float(sys.argv[2])))' \
     `wc -l $dir/ref_lexicon.txt | awk '{print $1}'` `wc -l $dir/ref_vocab.txt | awk '{print $1}'` \
     > $dir/target_num_prons_per_word || exit 1;
 
@@ -225,10 +230,11 @@ if [ $stage -le 2 ]; then
 
   # Get the oov words list (w.r.t ref vocab) which are in training data. 
   awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/ref_lexicon.txt \
-    $dir/train_counts.txt | sort > $dir/oov_train.txt 
+    $dir/train_counts.txt | awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' \
+    $dir/non_scored_words - | sort > $dir/oov_train.txt || exit 1; 
   
   awk 'NR==FNR{a[$1] = 1; next} {if(($1 in a)) b+=$2; else c+=$2} END{print c/(b+c)}' \
-    $dir/ref_vocab.txt $dir/train_counts.txt > $dir/train_oov_rate
+    $dir/ref_vocab.txt $dir/train_counts.txt > $dir/train_oov_rate || exit 1;
   
   echo "OOV rate (w.r.t. the reference lexicon) of the acoustic training data is:"
   cat $dir/train_oov_rate
@@ -237,14 +243,14 @@ if [ $stage -le 2 ]; then
   # cannot be found in lexicon_g2p, we simply assign oov_symbol's pronunciaiton
   # (like NSN) to them, in order to get phonetic decoding pron candidates for them later on.
   awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/oov_train.txt \
-    $dir/lexicon_g2p.txt > $dir/g2p_prons_for_oov_train.txt
+    $dir/lexicon_g2p.txt > $dir/g2p_prons_for_oov_train.txt || exit 1;
   
   # Get the pronunciation of oov_symbol.
-  oov_pron=`cat $dir/non_scored_entries | grep $oov_symbol | cut -f2- -d' '`
+  oov_pron=`cat $dir/non_scored_entries | grep $oov_symbol | awk '{print $2}'`
   # For oov words in training data for which we don't even have G2P pron candidates,
   # we simply assign them the pronunciation of the oov symbol (like <unk>).
   awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/g2p_prons_for_oov_train.txt \
-    $dir/oov_train.txt | awk -v op=$oov_pron '{print $0" "op}' > $dir/oov_train_no_pron.txt
+    $dir/oov_train.txt | awk -v op="$oov_pron" '{print $0" "op}' > $dir/oov_train_no_pron.txt || exit 1;
     
   cat $dir/oov_train_no_pron.txt $dir/g2p_prons_for_oov_train.txt $dir/ref_lexicon.txt | \
     awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/train_counts.txt - | \
@@ -263,7 +269,7 @@ if [ $stage -le 3 ]; then
   
   # We prune the phonetic decoding generated prons relative to the largest count, by setting "min_prob",
   # and only leave prons who are not present in the reference lexicon / g2p-generated lexicon.
-  cat $dir/ref_lexicon.txt $dir/lexicon_g2p.txt > $dir/phonetic_decoding/filter_lexicon.txt 
+  cat $dir/ref_lexicon.txt $dir/lexicon_g2p.txt | sort -u > $dir/phonetic_decoding/filter_lexicon.txt 
   
   $cmd $dir/phonetic_decoding/log/prons_to_lexicon.log steps/dict/prons_to_lexicon.py \
     --min-prob=$min_prob --filter-lexicon=$dir/phonetic_decoding/filter_lexicon.txt \
@@ -295,7 +301,7 @@ if [ $stage -le 4 ]; then
   
   # Generate lattices for the acoustic training data with the combined lexicon.
   if $retrain_src_mdl; then mdl_dir=$dir/${src_mdl_dir}_retrained; else mdl_dir=$src_mdl_dir; fi
-  steps/align_fmllr_lats.sh --cmd "$decode_cmd" --nj $nj \
+  steps/align_fmllr_lats.sh --acoustic-scale 0.05 --cmd "$decode_cmd" --nj $nj \
     $data $dir/lang_combined_iter1 $mdl_dir $dir/lats_iter1 || exit 1;
 
   # Get arc level information from the lattice.
@@ -321,13 +327,10 @@ if [ $stage -le 5 ]; then
   rm $dir/dict_combined_iter2/lexiconp.txt $dir/dict_combined_iter2/lexicon.txt 2>/dev/null
 
   # Prune away pronunciations which have low acoustic evidence from the first pass of lattice alignment.
-  $cmd $dir/lats_iter1/log/prune_pron_candidates.log steps/dict/internal/prune_pron_candidates.py $dir/lats_iter1/pron_stats.txt $dir/ref_lexicon.txt $dir/pruned_prons.txt
- 
-  awk 'NR==FNR{a[$0] = 1; next} (!($0 in a))' $dir/pruned_prons.txt $dir/lexicon_phonetic_decoding.txt \
-    > $dir/lexicon_phonetic_decoding_pruned.txt
-
-  awk 'NR==FNR{a[$0] = 1; next} (!($0 in a))' $dir/pruned_prons.txt $dir/lexicon_g2p.txt \
-    > $dir/lexicon_g2p_pruned.txt \
+  $cmd $dir/lats_iter1/log/prune_pron_candidates.log steps/dict/internal/prune_pron_candidates.py \
+    --variant-counts-ratio $variant_counts_ratio \
+    $dir/lats_iter1/pron_stats.txt $dir/lexicon_phonetic_decoding_pruned.txt $dir/lexiconp_g2p.txt $dir/ref_lexicon.txt \
+    $dir/lexicon_phonetic_decoding_pruned.txt $dir/lexicon_g2p_pruned.txt
 
   # Filter out words which don't appear in the acoustic training data
   cat $dir/lexicon_phonetic_decoding_pruned.txt $dir/lexicon_g2p_pruned.txt \
@@ -402,7 +405,7 @@ if [ $stage -le 7 ]; then
   # target vocab. We'll just assign to them pronunciations from lexicon_g2p, if any.
   cat $dir/lats_iter2/out_of_ref_vocab_prons_learned.txt $dir/ref_lexicon.txt | \
     awk 'NR==FNR{a[$1] = 1; next} !($1 in a)' - \
-    $dir/target_vocab.txt | sort | uniq > $dir/oov_no_acoustics.txt
+    $dir/target_vocab.txt | sort | uniq > $dir/oov_no_acoustics.txt || exit 1;
 
   awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/oov_no_acoustics.txt \
     $dir/lexicon_g2p.txt > $dir/g2p_prons_for_oov_no_acoustics.txt
@@ -426,5 +429,5 @@ if [ $stage -le 8 ]; then
   echo "  ...   sort -u \> $dest_dict/lexicon.txt to re-produce the final learned lexicon."
   cp $dir/lats_iter2/ref_lexicon_edits.txt $dest_dict/lexicon_edits.txt 2>/dev/null
   steps/dict/apply_lexicon_edits.py $dest_dict/lexicon0.txt $dir/lats_iter2/ref_lexicon_edits.txt - | \
-    sort | uniq > $dest_dict/lexicon.txt
+    sort | uniq > $dest_dict/lexicon.txt || exit 1;
 fi
diff --git a/egs/wsj/s5/steps/dict/learn_lexicon_greedy.sh b/egs/wsj/s5/steps/dict/learn_lexicon_greedy.sh
new file mode 100755
index 00000000000..56e85f20d62
--- /dev/null
+++ b/egs/wsj/s5/steps/dict/learn_lexicon_greedy.sh
@@ -0,0 +1,546 @@
+#! /bin/bash
+
+# Copyright 2018  Xiaohui Zhang
+# Apache 2.0
+
+# This recipe has similar inputs and outputs as steps/dict/learn_lexicon.sh
+# The major difference is, instead of using a Bayesian framework for 
+# pronunciation selection, we used a likelihood-reduction based greedy 
+# pronunciation selection framework presented in the paper:
+# "Acoustic data-driven lexicon learning based on a greedy pronunciation "
+# "selection framework, by X. Zhang, V. Mahonar, D. Povey and S. Khudanpur,"
+# "Interspeech 2017."
+
+# This script demonstrate how to expand a existing lexicon using a combination
+# of acoustic evidence and G2P to learn a lexicon that covers words in a target 
+# vocab, and agrees sufficiently with the acoustics. The basic idea is to 
+# run phonetic decoding on acoustic training data using an existing
+# acoustice model (possibly re-trained using a G2P-expanded lexicon) to get 
+# alternative pronunciations for words in training data. Then we combine three
+# exclusive sources of pronunciations: the reference lexicon (supposedly 
+# hand-derived), phonetic decoding, and G2P (optional) into one lexicon and then run 
+# lattice alignment on the same data, to collect acoustic evidence (soft
+# counts) of all pronunciations. Based on these statistics, we use a greedy
+# framework (see steps/dict/select_prons_greedy.sh for details) to select an
+# informative subset of pronunciations for each word with acoustic evidence. 
+# two important parameters are alpha and beta. Basically, the three dimensions of alpha
+# and beta correspond to three pronunciation sources: phonetic-decoding, G2P and
+# the reference lexicon, and the larger a value is, the more aggressive we'll
+# prune pronunciations from that sooure. The valid range of each dim. is [0, 1]
+# (for alpha, and 0 means we never pruned pron from that source.) [0, 100] (for beta). 
+# The output of steps/dict/select_prons_greedy.sh is a learned lexicon whose vocab 
+# matches the user-specified target-vocab, and two intermediate outputs which were
+# used to generate the learned lexicon: an edits file which records the recommended
+# changes to all in-ref-vocab words' prons, and a half-learned lexicon
+# ($dest_dict/lexicon0.txt) where all in-ref-vocab words' prons were untouched
+# (on top of which we apply the edits file to produce the final learned lexicon). 
+# The user can always modify the edits file manually and then re-apply it on the 
+# half-learned lexicon using steps/dict/apply_lexicon_edits.sh to produce the 
+# final learned lexicon. See the last stage in this script for details.
+
+stage=0
+# Begin configuration section.  
+cmd=run.pl
+nj=
+stage=0
+oov_symbol=
+lexiconp_g2p=
+min_prob=0.3
+variant_counts_ratio=8 
+variant_counts_no_acoustics=1 
+alpha="0,0,0"
+beta="0,0,0"
+delta=0.0000001
+num_gauss=
+num_leaves=
+retrain_src_mdl=true
+cleanup=true
+nj_select_prons=200
+learn_iv_prons=false # whether we want to learn the prons of IV words (w.r.t. ref_vocab), 
+
+# End configuration section.  
+
+. ./path.sh
+. utils/parse_options.sh
+
+if [ $# -lt 6 ] || [ $# -gt 7 ]; then
+  echo "Usage: $0 [options] <ref-dict> <target-vocab> <data> <src-mdl-dir> \\"
+  echo "          <ref-lang> <dest-dict> <dir>."
+  echo "  This script does lexicon expansion using a combination of acoustic"
+  echo "  evidence and G2P to produce a lexicon that covers words of a target vocab:"
+  echo ""               
+  echo "Arguments:"
+  echo " <ref-dict>     The dir which contains the reference lexicon (most probably hand-derived)"
+  echo "                we want to expand/improve, and nonsilence_phones.txt,.etc which we need " 
+  echo "                for building new dict dirs."
+  echo " <target-vocab> The vocabulary we want the final learned lexicon to cover (one word per line)."
+  echo " <data>         acoustic training data we use to get alternative"
+  echo "                pronunciations and collet acoustic evidence."
+  echo " <src-mdl-dir>  The dir containing an SAT-GMM acoustic model (we optionaly we re-train it" 
+  echo "                using G2P expanded lexicon) to do phonetic decoding (to get alternative"
+  echo "                pronunciations) and lattice-alignment (to collect acoustic evidence for"
+  echo "                evaluating all prounciations)"
+  echo " <ref-lang>     The reference lang dir which we use to get non-scored-words"
+  echo "                like <UNK> for building new dict dirs"
+  echo " <dest-dict>    The dict dir where we put the final learned lexicon, whose vocab"
+  echo "                matches <target-vocab>."
+  echo " <dir>          The dir which contains all the intermediate outputs of this script."
+  echo ""
+  echo "Note: <target-vocab> and the vocab of <data> don't have to match. For words"
+  echo "     who are in <target-vocab> but not seen in <data>, their pronunciations" 
+  echo "     will be given by G2P at the end."
+  echo ""
+  echo "e.g. $0 data/local/dict data/local/lm/librispeech-vocab.txt data/train \\"
+  echo "          exp/tri3 data/lang data/local/dict_learned"
+  echo "Options:"
+  echo "  --stage <n>                         # stage to run from, to enable resuming from partially"
+  echo "                                      # completed run (default: 0)"
+  echo "  --cmd '$cmd'                        # command to submit jobs with (e.g. run.pl, queue.pl)"
+  echo "  --nj <nj>                           # number of parallel jobs"
+  echo "  --oov-symbol '$oov_symbol'          # oov symbol, like <UNK>."
+  echo "  --lexiconp-g2p                      # a lexicon (with prob in the second column) file containing g2p generated"
+  echo "                                      # pronunciations, for words in acoustic training data / target vocabulary. It's optional."
+  echo "  --min-prob <float>                  # The cut-off parameter used to select pronunciation candidates from phonetic"
+  echo "                                      # decoding. We remove pronunciations with probabilities less than this value"
+  echo "                                      # after normalizing the probs s.t. the max-prob is 1.0 for each word."
+  echo "  --variant-counts-ratio <int>        # This ratio parameter determines the maximum number of pronunciation"
+  echo "                                      # candidates we will keep for each word, after pruning according to lattice statistics from"
+  echo "                                      # the first iteration of lattice generation. See steps/dict/internal/prune_pron_candidates.py"
+  echo "                                      # for details."
+  echo "  --variant-counts-no-acoustics <int> # how many g2p-prons per word we want to include for each words unseen in acoustic training data."
+  echo "  --alpha <float>,<float>,<float>     # scaling factors used in the greedy pronunciation selection framework, "
+  echo "                                      # see steps/dict/select_prons_greedy.py for details."
+  echo "  --beta <int>,<int>,<int>            # smoothing factors used in the greedy pronunciation selection framework, "
+  echo "                                      # see steps/dict/select_prons_greedy.py for details."
+  echo "  --delta <float>                     # a floor value used in the greedy pronunciation selection framework, "
+  echo "                                      # see steps/dict/select_prons_greedy.py for details."
+  echo "  --num-gauss                         # number of gaussians for the re-trained SAT model (on top of <src-mdl-dir>)."            
+  echo "  --num-leaves                        # number of leaves for the re-trained SAT model (on top of <src-mdl-dir>)." 
+  echo "  --retrain-src-mdl                   # true if you want to re-train the src_mdl before phone decoding (default false)."
+  exit 1
+fi
+
+echo "$0 $@"  # Print the command line for logging
+
+ref_dict=$1
+target_vocab=$2
+data=$3
+src_mdl_dir=$4
+ref_lang=$5
+dest_dict=$6
+
+if [ -z "$oov_symbol" ]; then
+   echo "$0: the --oov-symbol option is required."
+   exit 1
+fi
+
+if [ $# -gt 6 ]; then
+  dir=$7 # Most intermediate outputs will be put here. 
+else
+  dir=${src_mdl_dir}_lex_learn_work
+fi
+
+mkdir -p $dir
+if [ $stage -le 0 ]; then
+  echo "$0: Some preparatory work."
+  # Get the word counts of training data.
+  awk '{for (n=2;n<=NF;n++) counts[$n]++;} END{for (w in counts) printf "%s %d\n",w, counts[w];}' \
+    $data/text | sort > $dir/train_counts.txt
+  
+  # Get the non-scored entries and exclude them from the reference lexicon/vocab, and target_vocab.
+  steps/cleanup/internal/get_non_scored_words.py $ref_lang > $dir/non_scored_words
+  awk 'NR==FNR{a[$1] = 1; next} {if($1 in a) print $0}' $dir/non_scored_words \
+    $ref_dict/lexicon.txt > $dir/non_scored_entries 
+
+  # Remove non-scored-words from the reference lexicon.
+  awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' $dir/non_scored_words \
+    $ref_dict/lexicon.txt | tr -s '\t' ' ' | awk '$1=$1' > $dir/ref_lexicon.txt
+
+  cat $dir/ref_lexicon.txt | awk '{print $1}' | sort | uniq > $dir/ref_vocab.txt
+  awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' $dir/non_scored_words \
+    $target_vocab | sort | uniq > $dir/target_vocab.txt
+    
+  # From the reference lexicon, we estimate the target_num_prons_per_word as,
+  # round(avg. # prons per word in the reference lexicon). This'll be used as 
+  # the upper bound of # pron variants per word when we apply G2P or select prons to
+  # construct the learned lexicon in later stages.
+  python -c 'import sys; import math; print int(round(float(sys.argv[1])/float(sys.argv[2])))' \
+    `wc -l $dir/ref_lexicon.txt | awk '{print $1}'` `wc -l $dir/ref_vocab.txt | awk '{print $1}'` \
+    > $dir/target_num_prons_per_word || exit 1;
+
+  if [ -z $lexiconp_g2p ]; then
+    # create an empty list of g2p generated prons, if it's not given.
+    touch $dir/lexicon_g2p.txt
+    touch $dir/lexiconp_g2p.txt
+  else
+    # Exchange the 1st column (word) and 2nd column (prob) and remove pronunciations
+    # which are already in the reference lexicon.
+    cat $lexiconp_g2p | awk '{a=$1;b=$2; $1="";$2="";print b" "a$0}' | \
+      awk 'NR==FNR{a[$0] = 1; next} {w=$2;for (n=3;n<=NF;n++) w=w" "$n; if(!(w in a)) print $0}' \
+      $dir/ref_lexicon.txt - > $dir/lexiconp_g2p.txt 2>/dev/null
+    
+    # make a copy where we remove the first column (probabilities).
+    cat $dir/lexiconp_g2p.txt | cut -f1,3- > $dir/lexicon_g2p.txt 2>/dev/null
+  fi
+  variant_counts=`cat $dir/target_num_prons_per_word` || exit 1;
+  $cmd $dir/log/prune_g2p_lexicon.log steps/dict/prons_to_lexicon.py \
+    --top-N=$variant_counts $dir/lexiconp_g2p.txt \
+    $dir/lexicon_g2p_variant_counts${variant_counts}.txt || exit 1;
+fi
+
+if [ $stage -le 1 ] && $retrain_src_mdl; then
+  echo "$0: Expand the reference lexicon to cover all words in the target vocab. and then"
+  echo "   ... re-train the source acoustic model for phonetic decoding. "
+  mkdir -p $dir/dict_expanded_target_vocab
+  cp $ref_dict/{extra_questions.txt,optional_silence.txt,nonsilence_phones.txt,silence_phones.txt} \
+    $dir/dict_expanded_target_vocab  2>/dev/null
+  rm $dir/dict_expanded_target_vocab/lexiconp.txt $dir/dict_expanded_target_vocab/lexicon.txt 2>/dev/null
+  
+  # Get the oov words list (w.r.t ref vocab) which are in the target vocab. 
+  awk 'NR==FNR{a[$1] = 1; next} !($1 in a)' $dir/ref_lexicon.txt \
+    $dir/target_vocab.txt | sort | uniq > $dir/oov_target_vocab.txt
+
+  # Assign pronunciations from lexicon_g2p.txt to oov_target_vocab. For words which
+  # cannot be found in lexicon_g2p.txt, we simply ignore them.
+  awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/oov_target_vocab.txt \
+    $dir/lexicon_g2p.txt > $dir/lexicon_g2p_oov_target_vocab.txt
+  
+  cat $dir/lexicon_g2p_oov_target_vocab.txt $dir/ref_lexicon.txt | \
+    awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/target_vocab.txt - | \
+    cat $dir/non_scored_entries - | 
+    sort | uniq > $dir/dict_expanded_target_vocab/lexicon.txt
+  
+  utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt $dir/dict_expanded_target_vocab \
+    $oov_symbol $dir/lang_expanded_target_vocab_tmp $dir/lang_expanded_target_vocab || exit 1;
+  
+  # Align the acoustic training data using the given src_mdl_dir.
+  alidir=${src_mdl_dir}_ali_$(basename $data) 
+  steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
+    $data $dir/lang_expanded_target_vocab $src_mdl_dir $alidir || exit 1;
+  
+  # Train another SAT system on the given data and put it in $dir/${src_mdl_dir}_retrained
+  # this model will be used for phonetic decoding and lattice alignment later on.
+  if [ -z $num_leaves ] || [ -z $num_gauss ] ; then
+    echo "num_leaves and num_gauss need to be specified." && exit 1;
+  fi
+  steps/train_sat.sh --cmd "$train_cmd" $num_leaves $num_gauss \
+    $data $dir/lang_expanded_target_vocab $alidir $dir/${src_mdl_dir}_retrained || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Expand the reference lexicon to cover all words seen in,"
+  echo "  ... acoustic training data, and prepare corresponding dict and lang directories."
+  echo "  ... This is needed when generate pron candidates from phonetic decoding."
+  mkdir -p $dir/dict_expanded_train
+  cp $ref_dict/{extra_questions.txt,optional_silence.txt,nonsilence_phones.txt,silence_phones.txt} \
+    $dir/dict_expanded_train 2>/dev/null
+  rm $dir/dict_expanded_train/lexiconp.txt $dir/dict_expanded_train/lexicon.txt 2>/dev/null
+
+  # Get the oov words list (w.r.t ref vocab) which are in training data. 
+  awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/ref_lexicon.txt \
+    $dir/train_counts.txt | awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $0}' \
+    $dir/non_scored_words - | sort > $dir/oov_train.txt || exit 1; 
+  
+  awk 'NR==FNR{a[$1] = 1; next} {if(($1 in a)) b+=$2; else c+=$2} END{print c/(b+c)}' \
+    $dir/ref_vocab.txt $dir/train_counts.txt > $dir/train_oov_rate || exit 1;
+  
+  echo "OOV rate (w.r.t. the reference lexicon) of the acoustic training data is:"
+  cat $dir/train_oov_rate
+
+  # Assign pronunciations from lexicon_g2p to oov_train. For words which
+  # cannot be found in lexicon_g2p, we simply assign oov_symbol's pronunciaiton
+  # (like NSN) to them, in order to get phonetic decoding pron candidates for them later on.
+  variant_counts=`cat $dir/target_num_prons_per_word` || exit 1;
+  awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/oov_train.txt \
+    $dir/lexicon_g2p_variant_counts${variant_counts}.txt > $dir/g2p_prons_for_oov_train.txt || exit 1;
+  
+  # Get the pronunciation of oov_symbol.
+  oov_pron=`cat $dir/non_scored_entries | grep $oov_symbol | awk '{print $2}'`
+  # For oov words in training data for which we don't even have G2P pron candidates,
+  # we simply assign them the pronunciation of the oov symbol (like <unk>),
+  # so that we can get pronunciations for them from phonetic decoding.
+  awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/g2p_prons_for_oov_train.txt \
+    $dir/oov_train.txt | awk -v op="$oov_pron" '{print $0" "op}' > $dir/oov_train_no_pron.txt || exit 1;
+    
+  cat $dir/oov_train_no_pron.txt $dir/g2p_prons_for_oov_train.txt $dir/ref_lexicon.txt | \
+    awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/train_counts.txt - | \
+    cat - $dir/non_scored_entries | \
+    sort | uniq > $dir/dict_expanded_train/lexicon.txt || exit 1;
+  
+  utils/prepare_lang.sh $dir/dict_expanded_train $oov_symbol \
+    $dir/lang_expanded_train_tmp $dir/lang_expanded_train || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Generate pronunciation candidates from phonetic decoding on acoustic training data.."
+  if $retrain_src_mdl; then mdl_dir=$dir/${src_mdl_dir}_retrained; else mdl_dir=$src_mdl_dir; fi
+  steps/cleanup/debug_lexicon.sh  --nj $nj \
+    --cmd "$decode_cmd" $data $dir/lang_expanded_train \
+    $mdl_dir $dir/dict_expanded_train/lexicon.txt $dir/phonetic_decoding || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Combine the reference lexicon and pronunciations from phone-decoding/G2P into one"
+  echo "  ... lexicon, and run lattice alignment using this lexicon on acoustic training data"
+  echo "  ... to collect acoustic evidence."
+  # We first prune the phonetic decoding generated prons relative to the largest count, by setting "min_prob",
+  # and only leave prons who are not present in the reference lexicon / g2p-generated lexicon.
+  cat $dir/ref_lexicon.txt $dir/lexicon_g2p.txt | sort -u > $dir/phonetic_decoding/filter_lexicon.txt 
+  
+  $cmd $dir/phonetic_decoding/log/prons_to_lexicon.log steps/dict/prons_to_lexicon.py \
+    --min-prob=$min_prob --filter-lexicon=$dir/phonetic_decoding/filter_lexicon.txt \
+    $dir/phonetic_decoding/prons.txt $dir/lexicon_pd_with_eps.txt
+
+  # We abandon phonetic-decoding candidates for infrequent words.
+  awk '{if($2 < 3) print $1}' $dir/train_counts.txt > $dir/pd_candidates_to_exclude.txt 
+  awk 'NR==FNR{a[$1] = $2; next} {if(a[$1]<10) print $1}' $dir/train_counts.txt \
+    $dir/oov_train_no_pron.txt >> $dir/pd_candidates_to_exclude.txt 
+
+  if [ -s $dir/pd_candidates_to_exclude.txt ]; then
+    cat $dir/lexicon_pd_with_eps.txt | grep -vP "<eps>|<UNK>|<unk>|\[.*\]" | \
+      awk 'NR==FNR{a[$0] = 1; next} {if(!($1 in a)) print $0}' $dir/pd_candidates_to_exclude.txt - | \
+      sort | uniq > $dir/lexicon_pd.txt || exit 1;
+  else
+    cat $dir/lexicon_pd_with_eps.txt | grep -vP "<eps>|<UNK>|<unk>|\[.*\]" | \
+      sort | uniq > $dir/lexicon_pd.txt || exit 1;
+  fi
+
+  # Combine the reference lexicon, pronunciations from G2P and phonetic decoding into one lexicon.
+  mkdir -p $dir/dict_combined_iter1
+  cp $ref_dict/{extra_questions.txt,optional_silence.txt,nonsilence_phones.txt,silence_phones.txt} \
+    $dir/dict_combined_iter1/ 2>/dev/null
+  rm $dir/dict_combined_iter1/lexiconp.txt $dir/dict_combined_iter1/lexicon.txt 2>/dev/null
+
+  # Filter out words which don't appear in the acoustic training data
+  cat $dir/lexicon_pd.txt $dir/lexicon_g2p.txt \
+    $dir/ref_lexicon.txt | tr -s '\t' ' ' | \
+    awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/train_counts.txt - | \
+    cat $dir/non_scored_entries - | \
+    sort | uniq > $dir/dict_combined_iter1/lexicon.txt
+  
+  utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt \
+    $dir/dict_combined_iter1 $oov_symbol \
+    $dir/lang_combined_iter1_tmp $dir/lang_combined_iter1 || exit 1;
+  
+  # Generate lattices for the acoustic training data with the combined lexicon.
+  if $retrain_src_mdl; then mdl_dir=$dir/${src_mdl_dir}_retrained; else mdl_dir=$src_mdl_dir; fi
+
+  # Get the vocab for words for which we want to learn pronunciations.
+  if $learn_iv_prons; then
+    # If we want to learn the prons of IV words (w.r.t. ref_vocab), the learn_vocab is just the intersection of
+    # target_vocab and the vocab of words seen in acoustic training data (first col. of train_counts.txt)
+    awk 'NR==FNR{a[$1] = 1; next} {if($1 in a) print $1}' $dir/target_vocab.txt $dir/train_counts.txt \
+      > $dir/learn_vocab.txt
+  else
+    # Exclude words from the ref_vocab if we don't want to learn the pronunciations of IV words.
+    awk 'NR==FNR{a[$1] = 1; next} {if($1 in a) print $1}' $dir/target_vocab.txt $dir/train_counts.txt | \
+      awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/ref_vocab.txt - > $dir/learn_vocab.txt
+  fi
+  
+  # In order to get finer lattice stats of alternative prons, we want to make lattices deeper.
+  # To speed up lattice generation, we use a ctm to create sub-utterances and a sub-segmentation
+  # for each instance of a word within learn_vocab (or a string of consecutive words within learn_vocab),
+  # including a single out-of-learn-vocab word at the boundary if present.
+  mkdir -p $dir/resegmentation
+  steps/dict/internal/get_subsegments.py $dir/phonetic_decoding/word.ctm $dir/learn_vocab.txt \
+    $dir/resegmentation/subsegments $dir/resegmentation/text || exit 1;
+  utils/data/subsegment_data_dir.sh $data $dir/resegmentation/subsegments $dir/resegmentation/text \
+    $dir/resegmentation/data || exit 1;
+  steps/compute_cmvn_stats.sh $dir/resegmentation/data || exit 1;
+
+  steps/align_fmllr_lats.sh --beam 20 --retry-beam 50 --final-beam 30 --acoustic-scale 0.05 --cmd "$decode_cmd" --nj $nj \
+    $dir/resegmentation/data $dir/lang_combined_iter1 $mdl_dir $dir/lats_iter1 || exit 1;
+
+  # Get arc level information from the lattice.
+  $cmd JOB=1:$nj $dir/lats_iter1/log/get_arc_info.JOB.log \
+    lattice-align-words $dir/lang_combined_iter1/phones/word_boundary.int \
+    $dir/lats_iter1/final.mdl \
+    "ark:gunzip -c $dir/lats_iter1/lat.JOB.gz |" ark:- \| \
+    lattice-arc-post --acoustic-scale=0.1 $dir/lats_iter1/final.mdl ark:- - \| \
+    utils/int2sym.pl -f 5 $dir/lang_combined_iter1/words.txt \| \
+    utils/int2sym.pl -f 6- $dir/lang_combined_iter1/phones.txt '>' \
+    $dir/lats_iter1/arc_info_sym.JOB.txt || exit 1;
+  
+  # Compute soft counts (pron_stats) of every particular word-pronunciation pair by
+  # summing up arc level information over all utterances. We'll use this to prune
+  # pronunciation candidates before the next iteration of lattice generation.
+  cat $dir/lats_iter1/arc_info_sym.*.txt | steps/dict/get_pron_stats.py - \
+    $dir/phonetic_decoding/phone_map.txt $dir/lats_iter1/pron_stats.txt || exit 1;
+  
+  # Accumlate utterance-level pronunciation posteriors (into arc_stats) by summing up
+  # posteriors of arcs representing the same word & pronunciation and starting
+  # from roughly the same location. See steps/dict/internal/sum_arc_info.py for details.
+  for i in `seq 1 $nj`;do
+    cat $dir/lats_iter1/arc_info_sym.${i}.txt | sort -n -k1 -k2 -k3r | \
+      steps/dict/internal/sum_arc_info.py - $dir/phonetic_decoding/phone_map.txt $dir/lats_iter1/arc_info_summed.${i}.txt
+  done 
+  cat $dir/lats_iter1/arc_info_summed.*.txt | sort -k1 -k2 > $dir/lats_iter1/arc_stats.txt 
+
+  # Prune the phonetic_decoding lexicon so that any pronunciation that only has non-zero posterior at one word example will be removed.
+  # The pruned lexicon is put in $dir/lats_iter1. After further pruning in the next stage it'll be put back to $dir.
+  awk 'NR==FNR{w=$1;for (n=5;n<=NF;n++) w=w" "$n;a[w]+=1;next} {if($0 in a && a[$0]>1) print $0}' \
+    $dir/lats_iter1/arc_stats.txt $dir/lexicon_pd.txt > $dir/lats_iter1/lexicon_pd_pruned.txt
+fi
+
+# Here we re-generate lattices (with a wider beam and a pruned combined lexicon) and re-collect pronunciation statistics 
+if [ $stage -le 5 ]; then
+  echo "$0: Prune the pronunciation candidates generated from G2P/phonetic decoding, and re-do lattice-alignment."
+  mkdir -p $dir/dict_combined_iter2
+  cp $ref_dict/{extra_questions.txt,optional_silence.txt,nonsilence_phones.txt,silence_phones.txt} \
+    $dir/dict_combined_iter2/ 2>/dev/null
+  rm $dir/dict_combined_iter2/lexiconp.txt $dir/dict_combined_iter2/lexicon.txt 2>/dev/null
+
+  # Prune away pronunciations which have low acoustic evidence from the first pass of lattice generation.
+  $cmd $dir/lats_iter1/log/prune_pron_candidates.log steps/dict/internal/prune_pron_candidates.py \
+    --variant-counts-ratio $variant_counts_ratio \
+    $dir/lats_iter1/pron_stats.txt $dir/lats_iter1/lexicon_pd_pruned.txt $dir/lexiconp_g2p.txt $dir/ref_lexicon.txt \
+    $dir/lexicon_pd_pruned.txt $dir/lexicon_g2p_pruned.txt
+
+  # Filter out words which don't appear in the acoustic training data.
+  cat $dir/lexicon_pd_pruned.txt $dir/lexicon_g2p_pruned.txt \
+    $dir/ref_lexicon.txt | tr -s '\t' ' ' | \
+    awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/train_counts.txt - | \
+    cat $dir/non_scored_entries - | \
+    sort | uniq > $dir/dict_combined_iter2/lexicon.txt
+
+  utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt \
+    $dir/dict_combined_iter2 $oov_symbol \
+    $dir/lang_combined_iter2_tmp $dir/lang_combined_iter2 || exit 1;
+  
+  # Re-generate lattices with a wider beam, so that we'll get deeper lattices.
+  if $retrain_src_mdl; then mdl_dir=$dir/${src_mdl_dir}_retrained; else mdl_dir=$src_mdl_dir; fi
+  steps/align_fmllr_lats.sh  --beam 30 --retry-beam 60 --final-beam 50 --acoustic-scale 0.05 --cmd "$decode_cmd" --nj $nj \
+    $dir/resegmentation/data $dir/lang_combined_iter2 $mdl_dir $dir/lats_iter2 || exit 1;
+
+  # Get arc level information from the lattice as we did in the last stage.
+  $cmd JOB=1:$nj $dir/lats_iter2/log/get_arc_info.JOB.log \
+    lattice-align-words $dir/lang_combined_iter2/phones/word_boundary.int \
+    $dir/lats_iter2/final.mdl \
+    "ark:gunzip -c $dir/lats_iter2/lat.JOB.gz |" ark:- \| \
+    lattice-arc-post --acoustic-scale=0.1 $dir/lats_iter2/final.mdl ark:- - \| \
+    utils/int2sym.pl -f 5 $dir/lang_combined_iter2/words.txt \| \
+    utils/int2sym.pl -f 6- $dir/lang_combined_iter2/phones.txt '>' \
+    $dir/lats_iter2/arc_info_sym.JOB.txt || exit 1;
+  
+  # Compute soft counts (pron_stats) of every particular word-pronunciation pair as
+  # we did in the last stage. The stats will only be used as diagnostics.
+  cat $dir/lats_iter2/arc_info_sym.*.txt | steps/dict/get_pron_stats.py - \
+    $dir/phonetic_decoding/phone_map.txt $dir/lats_iter2/pron_stats.txt || exit 1;
+  
+  # Accumlate utterance-level pronunciation posteriors as we did in the last stage.
+  for i in `seq 1 $nj`;do
+    cat $dir/lats_iter2/arc_info_sym.${i}.txt | sort -n -k1 -k2 -k3r | \
+      steps/dict/internal/sum_arc_info.py - $dir/phonetic_decoding/phone_map.txt $dir/lats_iter2/arc_info_summed.${i}.txt
+  done 
+  cat $dir/lats_iter2/arc_info_summed.*.txt | sort -k1 -k2 > $dir/lats_iter2/arc_stats.txt 
+
+  # The pron_stats are the acoustic evidence which the likelihood-reduction-based pronunciation
+  # selection procedure will be based on.
+  # Split the utterance-level pronunciation posterior stats into $nj_select_prons pieces,
+  # so that the following pronunciation selection stage can be parallelized.
+  numsplit=$nj_select_prons
+  awk '{print $1"-"$2" "$1}' $dir/lats_iter2/arc_stats.txt > $dir/lats_iter2/utt2word
+  utt2words=$(for n in `seq $numsplit`; do echo $dir/lats_iter2/utt2word.$n; done)
+  utils/split_scp.pl --utt2spk=$dir/lats_iter2/utt2word $dir/lats_iter2/utt2word $utt2words || exit 1
+  for n in `seq $numsplit`; do 
+    (cat $dir/lats_iter2/utt2word.$n | awk '{$1=substr($1,length($2)+2);print $2" "$1}' - > $dir/lats_iter2/word2utt.$n
+     awk 'NR==FNR{a[$0] = 1; next} {b=$1" "$2; if(b in a) print $0}' $dir/lats_iter2/word2utt.$n \
+       $dir/lats_iter2/arc_stats.txt > $dir/lats_iter2/arc_stats.${n}.txt
+    ) &
+  done
+  wait
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Select pronunciations according to the acoustic evidence from lattice alignment."
+  # Given the acoustic evidence (soft-counts), we use a Bayesian framework to select pronunciations 
+  # from three exclusive candidate sources: reference (hand-derived) lexicon, G2P and phonetic decoding.
+  # The posteriors for all candidate prons for all words are printed into pron_posteriors.txt
+  # For words which are out of the ref. vocab, the learned prons are written into out_of_ref_vocab_prons_learned.txt.
+  # Among them, for words without acoustic evidence, we just ignore them, even if pron candidates from G2P were provided).
+  # For words in the ref. vocab, we instead output a human readable & editable "edits" file called
+  # ref_lexicon_edits.txt, which records all proposed changes to the prons (if any). Also, a 
+  # summary is printed into the log file.
+  
+  $cmd JOB=1:$nj_select_prons $dir/lats_iter2/log/generate_learned_lexicon.JOB.log \
+    steps/dict/select_prons_greedy.py \
+      --alpha=${alpha} --beta=${beta} \
+      --delta=${delta} \
+      $ref_dict/silence_phones.txt $dir/lats_iter2/arc_stats.JOB.txt $dir/train_counts.txt $dir/ref_lexicon.txt \
+      $dir/lexicon_g2p_pruned.txt $dir/lexicon_pd_pruned.txt \
+      $dir/lats_iter2/learned_lexicon.JOB.txt || exit 1;
+
+  cat $dir/lats_iter2/learned_lexicon.*.txt > $dir/lats_iter2/learned_lexicon.txt
+  rm $dir/lats_iter2/learned_lexicon.*.txt
+
+  $cmd $dir/lats_iter2/log/lexicon_learning_summary.log \
+    steps/dict/merge_learned_lexicons.py \
+      $dir/lats_iter2/arc_stats.txt $dir/train_counts.txt $dir/ref_lexicon.txt \
+      $dir/lexicon_g2p_pruned.txt $dir/lexicon_pd_pruned.txt \
+      $dir/lats_iter2/learned_lexicon.txt \
+      $dir/lats_iter2/out_of_ref_vocab_prons_learned.txt $dir/lats_iter2/ref_lexicon_edits.txt || exit 1;
+
+  cp $dir/lats_iter2/ref_lexicon_edits.txt $dir/lats_iter2/ref_lexicon_edits.txt
+  # Remove some stuff that takes up space and is unlikely to be useful later on.
+  if $cleanup; then
+    rm -r $dir/lats_iter*/{fsts*,lat*} 2>/dev/null
+  fi
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Expand the learned lexicon further to cover words in target vocab that are."
+  echo "  ... not seen in acoustic training data."
+  mkdir -p $dest_dict
+  cp $ref_dict/{extra_questions.txt,optional_silence.txt,nonsilence_phones.txt,silence_phones.txt} \
+    $dest_dict  2>/dev/null
+  rm $dest_dict/lexiconp.txt $dest_dict/lexicon.txt 2>/dev/null
+  # Get the list of oov (w.r.t. ref vocab) without acoustic evidence, which are in the
+  # target vocab. We'll just assign to them pronunciations from lexicon_g2p, if any.
+  cat $dir/lats_iter2/out_of_ref_vocab_prons_learned.txt $dir/ref_lexicon.txt | \
+    awk 'NR==FNR{a[$1] = 1; next} !($1 in a)' - \
+    $dir/target_vocab.txt | sort | uniq > $dir/oov_no_acoustics.txt || exit 1;
+  
+  variant_counts=$variant_counts_no_acoustics
+  
+  $cmd $dir/log/prune_g2p_lexicon.log steps/dict/prons_to_lexicon.py \
+    --top-N=$variant_counts $dir/lexiconp_g2p.txt \
+    $dir/lexicon_g2p_variant_counts${variant_counts}.txt || exit 1;
+  
+  awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/oov_no_acoustics.txt \
+    $dir/lexicon_g2p_variant_counts${variant_counts}.txt > $dir/g2p_prons_for_oov_no_acoustics.txt|| exit 1;
+
+  # Get the pronunciation of oov_symbol.
+  oov_pron=`cat $dir/non_scored_entries | grep $oov_symbol | awk '{print $2}'` || exit 1;
+  # For oov words in target_vocab for which we don't even have G2P pron candidates,
+  # we simply assign them the pronunciation of the oov symbol (like <unk>),
+  if [ -s $dir/g2p_prons_for_oov_no_acoustics.txt ]; then
+    awk 'NR==FNR{a[$1] = 1; next} {if(!($1 in a)) print $1}' $dir/g2p_prons_for_oov_no_acoustics.txt \
+      $dir/oov_no_acoustics.txt | awk -v op="$oov_pron" '{print $0" "op}' > $dir/oov_target_vocab_no_pron.txt || exit 1;
+  else
+    awk -v op="$oov_pron" '{print $0" "op}' $dir/oov_no_acoustics.txt > $dir/oov_target_vocab_no_pron.txt || exit 1
+  fi
+
+  # We concatenate three lexicons togethers: G2P lexicon for oov words without acoustics,
+  # learned lexicon for oov words with acoustics, and the original reference lexicon (for
+  # this part, later one we'll apply recommended changes using steps/dict/apply_lexicon_edits.py
+  cat $dir/g2p_prons_for_oov_no_acoustics.txt $dir/lats_iter2/out_of_ref_vocab_prons_learned.txt \
+    $dir/oov_target_vocab_no_pron.txt $dir/ref_lexicon.txt | tr -s '\t' ' ' | sort | uniq > $dest_dict/lexicon.temp
+
+  awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/target_vocab.txt \
+    $dest_dict/lexicon.temp | sort | uniq > $dest_dict/lexicon.nosil
+
+  cat $dir/non_scored_entries $dest_dict/lexicon.nosil | sort | uniq >$dest_dict/lexicon0.txt
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: Apply the ref_lexicon_edits file to the reference lexicon."
+  echo "  ... The user can inspect/modify the edits file and then re-run:"
+  echo "  ... steps/dict/apply_lexicon_edits.py $dest_dict/lexicon0.txt $dir/lats_iter2/ref_lexicon_edits.txt  - | \\"
+  echo "  ...   sort -u \> $dest_dict/lexicon.txt to re-produce the final learned lexicon."
+  cp $dir/lats_iter2/ref_lexicon_edits.txt $dest_dict/lexicon_edits.txt 2>/dev/null
+  steps/dict/apply_lexicon_edits.py $dest_dict/lexicon0.txt $dir/lats_iter2/ref_lexicon_edits.txt - | \
+    sort | uniq > $dest_dict/lexicon.txt || exit 1;
+fi
+
+echo "Lexicon learning ends successfully. Please refer to $dir/lats_iter2/log/lexicon_learning_summary.log"
+echo "  for a summary. The learned lexicon, whose vocab matches the target_vocab, is $dest_dict/lexicon.txt"
diff --git a/egs/wsj/s5/steps/dict/merge_learned_lexicons.py b/egs/wsj/s5/steps/dict/merge_learned_lexicons.py
new file mode 100755
index 00000000000..6df7eb7a744
--- /dev/null
+++ b/egs/wsj/s5/steps/dict/merge_learned_lexicons.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+
+# Copyright 2018  Xiaohui Zhang
+# Apache 2.0.
+
+from __future__ import print_function
+from collections import defaultdict
+import argparse
+import sys
+import math
+
+def GetArgs():
+    parser = argparse.ArgumentParser(
+        description = "Convert a learned lexicon produced by steps/dict/select_prons_greedy.sh"
+        "into a lexicon for OOV words (w.r.t. ref. vocab) and a human editable lexicon-edit file."
+        "for in-vocab words, and generate detailed summaries of the lexicon learning results"
+        "The inputs are a learned lexicon, an arc-stats file, and three source lexicons "
+        "(phonetic-decoding(PD)/G2P/ref). The outputs are: a learned lexicon for OOVs"
+        "(learned_lexicon_oov), and a lexicon_edits file (ref_lexicon_edits) containing"
+        "suggested modifications of prons, for in-vocab words.",
+        epilog = "See steps/dict/learn_lexicon_greedy.sh for example.")
+    parser.add_argument("arc_stats_file", metavar = "<arc-stats-file>", type = str,
+                        help = "File containing word-pronunciation statistics obtained from lattices; "
+                        "each line must be <word> <utt-id> <start-frame> <count> <phones>")
+    parser.add_argument("word_counts_file", metavar = "<counts-file>", type = str,
+                        help = "File containing word counts in acoustic training data; "
+                        "each line must be <word> <count>.")
+    parser.add_argument("ref_lexicon", metavar = "<reference-lexicon>", type = str,
+                        help = "The reference lexicon (most probably hand-derived)."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("g2p_lexicon", metavar = "<g2p-expanded-lexicon>", type = str,
+                        help = "Candidate ronouciations from G2P results."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("pd_lexicon", metavar = "<prons-in-acoustic-evidence>", type = str,
+                        help = "Candidate ronouciations from phonetic decoding results."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("learned_lexicon", metavar = "<learned-lexicon>", type = str,
+                        help = "Learned lexicon."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("learned_lexicon_oov", metavar = "<learned-lexicon-oov>", type = str,
+                        help = "Output file which is the learned lexicon for words out of the ref. vocab.")
+    parser.add_argument("ref_lexicon_edits", metavar = "<lexicon-edits>", type = str,
+                        help = "Output file containing human-readable & editable pronounciation info (and the"
+                        "accept/reject decision made by our algorithm) for those words in ref. vocab," 
+                        "to which any change has been recommended. The info for each word is like:" 
+                        "------------ an 4086.0 --------------"
+                        "R  | Y |  2401.6 |  AH N"
+                        "R  | Y |  640.8 |  AE N"
+                        "P  | Y |  1035.5 |  IH N"
+                        "R(ef), P(hone-decoding) represents the pronunciation source"
+                        "Y/N means the recommended decision of including this pron or not"
+                        "and the numbers are soft counts accumulated from lattice-align-word outputs. "
+                        "See the function WriteEditsAndSummary for more details.")
+ 
+    print (' '.join(sys.argv), file=sys.stderr)
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if args.arc_stats_file == "-":
+        args.arc_stats_file_handle = sys.stdin
+    else:
+        args.arc_stats_file_handle = open(args.arc_stats_file)
+    args.word_counts_file_handle = open(args.word_counts_file)
+    args.ref_lexicon_handle = open(args.ref_lexicon)
+    args.g2p_lexicon_handle = open(args.g2p_lexicon)
+    args.pd_lexicon_handle = open(args.pd_lexicon)
+    args.learned_lexicon_handle = open(args.learned_lexicon)
+    args.learned_lexicon_oov_handle = open(args.learned_lexicon_oov, "w")
+    args.ref_lexicon_edits_handle = open(args.ref_lexicon_edits, "w")
+    
+    return args
+
+def ReadArcStats(arc_stats_file_handle):
+    stats = defaultdict(lambda : defaultdict(dict))
+    stats_summed = defaultdict(float)
+    for line in arc_stats_file_handle.readlines():
+        splits = line.strip().split()
+
+        if (len(splits) == 0):
+            continue
+
+        if (len(splits) < 5):
+            raise Exception('Invalid format of line ' + line
+                                + ' in ' + arc_stats_file)
+        utt = splits[1]
+        start_frame = int(splits[2])
+        word = splits[0]
+        count = float(splits[3])
+        phones = splits[4:]
+        phones = ' '.join(phones)
+        stats[word][(utt, start_frame)][phones] = count
+        stats_summed[(word, phones)] += count
+    return stats, stats_summed
+
+def ReadWordCounts(word_counts_file_handle):
+    counts = {}
+    for line in word_counts_file_handle.readlines():
+        splits = line.strip().split()
+        if len(splits) < 2:
+            raise Exception('Invalid format of line ' + line
+                                + ' in counts file.')
+        word = splits[0]
+        count = int(splits[1])
+        counts[word] = count
+    return counts
+
+def ReadLexicon(args, lexicon_file_handle, counts):
+    # we're skipping any word not in counts (not seen in training data),
+    # cause we're only learning prons for words who have acoustic examples.
+    lexicon = defaultdict(set)
+    for line in lexicon_file_handle.readlines():
+        splits = line.strip().split()
+        if len(splits) == 0:
+            continue
+        if len(splits) < 2:
+            raise Exception('Invalid format of line ' + line
+                                + ' in lexicon file.')
+        word = splits[0]
+        if word not in counts:
+            continue
+        phones = ' '.join(splits[1:])
+        lexicon[word].add(phones)
+    return lexicon
+
+def WriteEditsAndSummary(args, learned_lexicon, ref_lexicon, pd_lexicon, g2p_lexicon, counts, stats, stats_summed):
+    # Note that learned_lexicon and ref_lexicon are dicts of sets of prons, while the other two lexicons are sets of (word, pron) pairs.
+    threshold = 2
+    words = [defaultdict(set) for i in range(4)] # "words" contains four bins, where we
+    # classify each word into, according to whether it's count > threshold,
+    # and whether it's OOVs w.r.t the reference lexicon.
+
+    src = {}
+    print("# Note: This file contains pronunciation info for words who have candidate "
+          "prons from G2P/phonetic-decoding accepted in the learned lexicon"
+          ", sorted by their counts in acoustic training data, "
+          ,file=args.ref_lexicon_edits_handle)
+    print("# 1st Col: source of the candidate pron: G(2P) / P(hone-decoding) / R(eference)."
+          ,file=args.ref_lexicon_edits_handle)
+    print("# 2nd Col: accepted or not in the learned lexicon (Y/N).", file=args.ref_lexicon_edits_handle)
+    print("# 3rd Col: soft counts from lattice-alignment (not augmented by prior-counts)."
+          ,file=args.ref_lexicon_edits_handle)
+    print("# 4th Col: the pronunciation cadidate.", file=args.ref_lexicon_edits_handle)
+    
+    # words which are to be printed into the edits file.
+    words_to_edit = [] 
+    num_prons_tot = 0
+    for word in learned_lexicon:
+        num_prons_tot += len(learned_lexicon[word])
+        count = len(stats[word]) # This count could be smaller than the count read from the dict "counts",
+        # since in each sub-utterance, multiple occurences (which is rare) of the same word are compressed into one.
+        # We use this count here so that in the edit-file, soft counts for each word sum up to one. 
+        flags = ['0' for i in range(3)] # "flags" contains three binary indicators, 
+        # indicating where this word's pronunciations come from.
+        for pron in learned_lexicon[word]:
+            if word in pd_lexicon and pron in pd_lexicon[word]:
+                flags[0] = '1'
+                src[(word, pron)] = 'P'
+            elif word in ref_lexicon and pron in ref_lexicon[word]:
+                flags[1] = '1'
+                src[(word, pron)] = 'R'
+            elif word in g2p_lexicon and pron in g2p_lexicon[word]:
+                flags[2] = '1'
+                src[(word, pron)] = 'G'
+        if word in ref_lexicon:
+            all_ref_prons_accepted = True
+            for pron in ref_lexicon[word]:
+                if pron not in learned_lexicon[word]:
+                    all_ref_prons_accepted = False
+                    break
+            if not all_ref_prons_accepted or flags[0] == '1' or flags[2] == '1':
+                words_to_edit.append((word, len(stats[word])))
+            if count > threshold:
+                words[0][flags[0] + flags[1] + flags[2]].add(word)
+            else:
+                words[1][flags[0] + flags[1] + flags[2]].add(word)
+        else:
+            if count > threshold: 
+                words[2][flags[0] + flags[2]].add(word)
+            else:
+                words[3][flags[0] + flags[2]].add(word)
+
+    words_to_edit_sorted = sorted(words_to_edit, key=lambda entry: entry[1], reverse=True)
+    for word, count in words_to_edit_sorted:
+        print("------------",word, "%2.1f" % count, "--------------", file=args.ref_lexicon_edits_handle)
+        learned_prons = []
+        for pron in learned_lexicon[word]:
+            learned_prons.append((src[(word, pron)], 'Y', stats_summed[(word, pron)], pron))
+        for pron in ref_lexicon[word]:
+            if pron not in learned_lexicon[word]:
+                learned_prons.append(('R', 'N', stats_summed[(word, pron)], pron))
+        learned_prons_sorted = sorted(learned_prons, key=lambda item: item[2], reverse=True)
+        for item in learned_prons_sorted:
+            print('{} | {} |  {:.2f} | {}'.format(item[0], item[1], item[2], item[3]), file=args.ref_lexicon_edits_handle)
+
+    num_oovs_with_acoustic_evidence = len(set(learned_lexicon.keys()).difference(set(ref_lexicon.keys())))
+    num_oovs = len(set(counts.keys()).difference(set(ref_lexicon.keys())))
+    num_ivs = len(learned_lexicon) - num_oovs_with_acoustic_evidence
+    print("Average num. prons per word in the learned lexicon is {}".format(float(num_prons_tot)/float(len(learned_lexicon))), file=sys.stderr)
+    # print("Here are the words whose reference pron candidates were all declined", words[0]['100'], file=sys.stderr)
+    print("-------------------------------------------------Summary------------------------------------------", file=sys.stderr)
+    print("We have acoustic evidence for {} out of {} in-vocab (w.r.t the reference lexicon) words from the acoustic training data.".format(num_ivs, len(ref_lexicon)), file=sys.stderr) 
+    print("  Among those frequent words whose counts in the training text > ", threshold, ":", file=sys.stderr) 
+    num_freq_ivs_from_all_sources = len(words[0]['111']) + len(words[0]['110']) + len(words[0]['011'])
+    num_freq_ivs_from_g2p_or_phonetic_decoding = len(words[0]['101']) + len(words[0]['001']) + len(words[0]['100'])
+    num_freq_ivs_from_ref = len(words[0]['010'])
+    num_infreq_ivs_from_all_sources = len(words[1]['111']) + len(words[1]['110']) + len(words[1]['011'])
+    num_infreq_ivs_from_g2p_or_phonetic_decoding = len(words[1]['101']) + len(words[1]['001']) + len(words[1]['100'])
+    num_infreq_ivs_from_ref = len(words[1]['010'])
+    print('    {} words\' selected prons came from the reference lexicon, G2P/phonetic-decoding.'.format(num_freq_ivs_from_all_sources), file=sys.stderr)
+    print('    {} words\' selected prons come from G2P/phonetic-decoding-generated.'.format(num_freq_ivs_from_g2p_or_phonetic_decoding), file=sys.stderr) 
+    print('    {} words\' selected prons came from the reference lexicon only.'.format(num_freq_ivs_from_ref), file=sys.stderr) 
+    print('  For those words whose counts in the training text <= {}:'.format(threshold), file=sys.stderr) 
+    print('    {} words\' selected prons came from the reference lexicon, G2P/phonetic-decoding.'.format(num_infreq_ivs_from_all_sources), file=sys.stderr)
+    print('    {} words\' selected prons come from G2P/phonetic-decoding-generated.'.format(num_infreq_ivs_from_g2p_or_phonetic_decoding), file=sys.stderr) 
+    print('    {} words\' selected prons came from the reference lexicon only.'.format(num_infreq_ivs_from_ref), file=sys.stderr) 
+    print("---------------------------------------------------------------------------------------------------", file=sys.stderr)
+    num_freq_oovs_from_both_sources = len(words[2]['11'])
+    num_freq_oovs_from_phonetic_decoding = len(words[2]['10'])
+    num_freq_oovs_from_g2p = len(words[2]['01'])
+    num_infreq_oovs_from_both_sources = len(words[3]['11'])
+    num_infreq_oovs_from_phonetic_decoding = len(words[3]['10'])
+    num_infreq_oovs_from_g2p = len(words[3]['01'])
+    print('We have acoustic evidence for {} out of {} OOV (w.r.t the reference lexicon) words from the acoustic training data.'.format(num_oovs_with_acoustic_evidence, num_oovs), file=sys.stderr)
+    print('  Among those words whose counts in the training text > {}:'.format(threshold), file=sys.stderr)
+    print('    {} words\' selected prons came from G2P and phonetic-decoding.'.format(num_freq_oovs_from_both_sources), file=sys.stderr)
+    print('    {} words\' selected prons came from phonetic decoding only.'.format(num_freq_oovs_from_phonetic_decoding), file=sys.stderr) 
+    print('    {} words\' selected prons came from G2P only.'.format(num_freq_oovs_from_g2p), file=sys.stderr) 
+    print('  For those words whose counts in the training text <= {}:'.format(threshold), file=sys.stderr) 
+    print('    {} words\' selected prons came from G2P and phonetic-decoding.'.format(num_infreq_oovs_from_both_sources), file=sys.stderr)
+    print('    {} words\' selected prons came from phonetic decoding only.'.format(num_infreq_oovs_from_phonetic_decoding), file=sys.stderr) 
+    print('    {} words\' selected prons came from G2P only.'.format(num_infreq_oovs_from_g2p), file=sys.stderr) 
+
+def WriteLearnedLexiconOov(learned_lexicon, ref_lexicon, file_handle):
+    for word, prons in learned_lexicon.iteritems():
+        if word not in ref_lexicon:
+            for pron in prons:
+                print('{0} {1}'.format(word, pron), file=file_handle)
+    file_handle.close()
+
+def Main():
+    args = GetArgs()
+
+    # Read in three lexicon sources, word counts, and pron stats.
+    counts = ReadWordCounts(args.word_counts_file_handle)
+    ref_lexicon = ReadLexicon(args, args.ref_lexicon_handle, counts)
+    g2p_lexicon = ReadLexicon(args, args.g2p_lexicon_handle, counts)
+    pd_lexicon =  ReadLexicon(args, args.pd_lexicon_handle, counts)
+    stats, stats_summed = ReadArcStats(args.arc_stats_file_handle)
+    learned_lexicon =  ReadLexicon(args, args.learned_lexicon_handle, counts)
+    
+    # Write the learned prons for words out of the ref. vocab into learned_lexicon_oov.
+    WriteLearnedLexiconOov(learned_lexicon, ref_lexicon, args.learned_lexicon_oov_handle)
+    # Edits will be printed into ref_lexicon_edits, and the summary will be printed into stderr.
+    WriteEditsAndSummary(args, learned_lexicon, ref_lexicon, pd_lexicon, g2p_lexicon, counts, stats, stats_summed)
+
+if __name__ == "__main__":
+    Main()
diff --git a/egs/wsj/s5/steps/dict/prons_to_lexicon.py b/egs/wsj/s5/steps/dict/prons_to_lexicon.py
index 2a87d172602..37d7810411b 100755
--- a/egs/wsj/s5/steps/dict/prons_to_lexicon.py
+++ b/egs/wsj/s5/steps/dict/prons_to_lexicon.py
@@ -6,6 +6,7 @@
 
 # we're using python 3.x style print but want it to work in python 2.x,
 from __future__ import print_function
+from collections import defaultdict
 import argparse
 import sys
 
@@ -21,15 +22,15 @@ def __call__(self, parser, namespace, values, option_string=None):
             raise Exception("Unknown value {0} for --{1}".format(values, self.dest))
 
 def GetArgs():
-    parser = argparse.ArgumentParser(description = "Converts pronunciation statistics (from phone level decoding) "
-                                     "into a lexicon for lexicon learning. We prune the pronunciations "
+    parser = argparse.ArgumentParser(description = "Converts pronunciation statistics (from phonetic decoding or g2p) "
+                                     "into a lexicon for. We prune the pronunciations "
                                      "based on a provided stats file, and optionally filter out entries which are present "
                                      "in a filter lexicon.",
                                      epilog = "e.g. steps/dict/prons_to_lexicon.py --min-prob=0.4 \\"
                                      "--filter-lexicon=exp/tri3_lex_0.4_work/phone_decode/filter_lexicon.txt \\"
                                      "exp/tri3_lex_0.4_work/phone_decode/prons.txt \\"
                                      "exp/tri3_lex_0.4_work/lexicon_phone_decoding.txt"
-                                     "See steps/dict/learn_lexicon.sh for examples in detail.")
+                                     "See steps/dict/learn_lexicon_greedy.sh for examples in detail.")
 
     parser.add_argument("--set-sum-to-one", type = str, default = False,
                         action = StrToBoolAction, choices = ["true", "false"],
@@ -39,6 +40,8 @@ def GetArgs():
                         action = StrToBoolAction, choices = ["true", "false"],
                         help = "If normalize lexicon such that the max "
                         "probability is 1.")
+    parser.add_argument("--top-N", type = int, default = 0,
+                        help = "If non-zero, we just take the top N pronunciations (according to stats/pron-probs) for each word.")
     parser.add_argument("--min-prob", type = float, default = 0.1,
                         help = "Remove pronunciation with probabilities less "
                         "than this value after normalization.")
@@ -46,8 +49,7 @@ def GetArgs():
                         help = "Exclude entries in this filter lexicon from the output lexicon."
                         "each line must be <word> <phones>")
     parser.add_argument("stats_file", metavar='<stats-file>', type = str,
-                        help = "Input file containing pronunciation statistics, representing how many times "
-                        "each word-pronunciation appear in the phonetic decoding results."
+                        help = "Input lexicon file containing pronunciation statistics/probs in the first column."
                         "each line must be <counts> <word> <phones>")
     parser.add_argument("out_lexicon", metavar='<out-lexicon>', type = str,
                         help = "Output lexicon.")
@@ -150,6 +152,18 @@ def NormalizeLexicon(lexicon, set_max_to_one = True,
             prob = 0
         lexicon[entry] = prob
 
+def TakeTopN(lexicon, top_N):
+    lexicon_reshaped = defaultdict(list) 
+    lexicon_pruned = {}
+    for entry, prob in lexicon.iteritems():
+        lexicon_reshaped[entry[0]].append([entry[1], prob])
+    for word in lexicon_reshaped:
+        prons = lexicon_reshaped[word]
+        sorted_prons = sorted(prons, reverse=True, key=lambda prons: prons[1])
+        for i in range(len(sorted_prons)):
+            if i >= top_N:
+                lexicon[(word, sorted_prons[i][0])] = 0
+        
 def WriteLexicon(args, lexicon, filter_lexicon):
     words = set()
     num_removed = 0
@@ -179,10 +193,15 @@ def Main():
     word_probs = ConvertWordCountsToProbs(args, lexicon, word_count)
 
     lexicon = ConvertWordProbsToLexicon(word_probs)
-    filter_lexicon = ReadLexicon(args.filter_lexicon_handle)
-    NormalizeLexicon(lexicon, set_max_to_one = args.set_max_to_one,
-                     set_sum_to_one = args.set_sum_to_one,
-                     min_prob = args.min_prob)
+    filter_lexicon = set()
+    if args.filter_lexicon is not '':
+        filter_lexicon = ReadLexicon(args.filter_lexicon_handle)
+    if args.top_N > 0:
+        TakeTopN(lexicon, args.top_N)
+    else:
+        NormalizeLexicon(lexicon, set_max_to_one = args.set_max_to_one,
+                         set_sum_to_one = args.set_sum_to_one,
+                         min_prob = args.min_prob)
     WriteLexicon(args, lexicon, filter_lexicon)
     args.out_lexicon_handle.close()
 
diff --git a/egs/wsj/s5/steps/dict/prune_pron_candidates.py b/egs/wsj/s5/steps/dict/prune_pron_candidates.py
index affc5b17705..cd90a389a7c 100755
--- a/egs/wsj/s5/steps/dict/prune_pron_candidates.py
+++ b/egs/wsj/s5/steps/dict/prune_pron_candidates.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 from collections import defaultdict
 import argparse
 import sys
@@ -16,7 +17,7 @@ def GetArgs():
                                      "(For words in the reference lexicon, N = # pron variants given by the reference"
                                      "lexicon; For oov words, N = avg. # pron variants per word in the reference lexicon)."
                                      "r is a user-specified constant, like 2.",
-                                     epilog = "See steps/dict/learn_lexicon.sh for example")
+                                     epilog = "See steps/dict/learn_lexicon_greedy.sh for example")
 
     parser.add_argument("--r", type = float, default = "2.0",
                         help = "a user-specified ratio parameter which determines how many"
@@ -61,7 +62,7 @@ def ReadStats(pron_stats_handle):
         phones = ' '.join(splits[2:])
         stats[word].append((phones, count))
 
-    for word, entry in stats.iteritems():
+    for word, entry in stats.items():
         entry.sort(key=lambda x: x[1])
     return stats
 
@@ -86,12 +87,12 @@ def PruneProns(args, stats, ref_lexicon):
     # Compute the average # pron variants counts per word in the reference lexicon.
     num_words_ref = 0
     num_prons_ref = 0
-    for word, prons in ref_lexicon.iteritems():
+    for word, prons in ref_lexicon.items():
         num_words_ref += 1
         num_prons_ref += len(prons)
     avg_variants_counts_ref = math.ceil(float(num_prons_ref) / float(num_words_ref))
 
-    for word, entry in stats.iteritems():
+    for word, entry in stats.items():
         if word in ref_lexicon:
             variants_counts = args.r * len(ref_lexicon[word])
         else:
@@ -105,7 +106,7 @@ def PruneProns(args, stats, ref_lexicon):
             except IndexError:
                 break
         
-    for word, entry in stats.iteritems():
+    for word, entry in stats.items():
         for pron, prob in entry:
             if word not in ref_lexicon or pron not in ref_lexicon[word]:
                 print('{0} {1}'.format(word, pron), file=args.pruned_prons_handle)
diff --git a/egs/wsj/s5/steps/dict/select_prons_bayesian.py b/egs/wsj/s5/steps/dict/select_prons_bayesian.py
index e728a4af0b8..893dd7cb818 100755
--- a/egs/wsj/s5/steps/dict/select_prons_bayesian.py
+++ b/egs/wsj/s5/steps/dict/select_prons_bayesian.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 from collections import defaultdict
 import argparse
 import sys
@@ -23,7 +24,7 @@ def GetArgs():
                                      "a learned lexicon for words out of the ref. vocab (learned_lexicon_oov),"
                                      "and a lexicon_edits file containing suggested modifications of prons, for"
                                      "words within the ref. vocab (ref_lexicon_edits).",
-                                     epilog = "See steps/dict/learn_lexicon.sh for example.")
+                                     epilog = "See steps/dict/learn_lexicon_bayesian.sh for example.")
     parser.add_argument("--prior-mean", type = str, default = "0,0,0",
                         help = "Mean of priors (summing up to 1) assigned to three exclusive n"
                         "pronunciatio sources: reference lexicon, g2p, and phonetic decoding. We "
@@ -162,7 +163,7 @@ def FilterPhoneticDecodingLexicon(args, phonetic_decoding_lexicon, stats):
     for line in args.silence_file_handle:
         silphones.add(line.strip())
     rejected_candidates = set()
-    for word, prons in phonetic_decoding_lexicon.iteritems():
+    for word, prons in phonetic_decoding_lexicon.items():
         for pron in prons:
             for phone in pron.split():
                 if phone in silphones:
@@ -194,7 +195,7 @@ def ComputePriorCounts(args, counts, ref_lexicon, g2p_lexicon, phonetic_decoding
             prior_mean[2] = 0
         prior_mean_sum = sum(prior_mean)
         try:
-            prior_mean = [t / prior_mean_sum for t in prior_mean] 
+            prior_mean = [float(t) / prior_mean_sum for t in prior_mean] 
         except ZeroDivisionError:
             print('WARNING: word {} appears in train_counts but not in any lexicon.'.format(word), file=sys.stderr)
         prior_counts[word] = [t * args.prior_counts_tot for t in prior_mean] 
@@ -206,20 +207,20 @@ def ComputePosteriors(args, stats, ref_lexicon, g2p_lexicon, phonetic_decoding_l
     # The soft-counts were augmented by a user-specified prior count, according the source 
     # (ref/G2P/phonetic-decoding) of this pronunciation.
 
-    for word, prons in ref_lexicon.iteritems():
+    for word, prons in ref_lexicon.items():
         for pron in prons:
             # c is the augmented soft count (observed count + prior count)
-            c = prior_counts[word][0] / len(ref_lexicon[word]) + stats.get((word, pron), 0)
+            c = float(prior_counts[word][0]) / len(ref_lexicon[word]) + stats.get((word, pron), 0)
             posteriors[word].append((pron, c))
 
-    for word, prons in g2p_lexicon.iteritems():
+    for word, prons in g2p_lexicon.items():
         for pron in prons:
-            c = prior_counts[word][1] / len(g2p_lexicon[word]) + stats.get((word, pron), 0)
+            c = float(prior_counts[word][1]) / len(g2p_lexicon[word]) + stats.get((word, pron), 0)
             posteriors[word].append((pron, c))
 
-    for word, prons in phonetic_decoding_lexicon.iteritems():
+    for word, prons in phonetic_decoding_lexicon.items():
         for pron in prons:
-            c = prior_counts[word][2] / len(phonetic_decoding_lexicon[word]) + stats.get((word, pron), 0)
+            c = float(prior_counts[word][2]) / len(phonetic_decoding_lexicon[word]) + stats.get((word, pron), 0)
             posteriors[word].append((pron, c))
 
     num_prons_from_ref = sum(len(ref_lexicon[i]) for i in ref_lexicon)
@@ -239,10 +240,10 @@ def ComputePosteriors(args, stats, ref_lexicon, g2p_lexicon, phonetic_decoding_l
         # each entry is a pair: (prounciation, count)
         count_sum[word] = sum([entry[1] for entry in posteriors[word]])
     
-    for word, entry in posteriors.iteritems():
+    for word, entry in posteriors.items():
         new_entry = []
         for pron, count in entry:      
-            post = count / count_sum[word]
+            post = float(count) / count_sum[word]
             new_entry.append((pron, post))
             source = 'R'
             if word in g2p_lexicon and pron in g2p_lexicon[word]:
@@ -260,7 +261,7 @@ def SelectPronsBayesian(args, counts, posteriors, ref_lexicon, g2p_lexicon, phon
     phonetic_decoding_selected = 0
     learned_lexicon = defaultdict(set)
 
-    for word, entry in posteriors.iteritems():
+    for word, entry in posteriors.items():
         num_variants = 0
         post_tot = 0.0
         variants_counts = args.variants_counts
@@ -411,7 +412,7 @@ def WriteEditsAndSummary(args, learned_lexicon, ref_lexicon, phonetic_decoding_l
     print('    {} words\' selected prons came from G2P only.'.format(num_infreq_oovs_from_g2p), file=sys.stderr) 
 
 def WriteLearnedLexiconOov(learned_lexicon, ref_lexicon, file_handle):
-    for word, prons in learned_lexicon.iteritems():
+    for word, prons in learned_lexicon.items():
         if word not in ref_lexicon:
             for pron in prons:
                 print('{0} {1}'.format(word, pron), file=file_handle)
diff --git a/egs/wsj/s5/steps/dict/select_prons_greedy.py b/egs/wsj/s5/steps/dict/select_prons_greedy.py
new file mode 100755
index 00000000000..cf71070e134
--- /dev/null
+++ b/egs/wsj/s5/steps/dict/select_prons_greedy.py
@@ -0,0 +1,376 @@
+#!/usr/bin/env python
+
+# Copyright 2018  Xiaohui Zhang
+# Apache 2.0.
+
+from __future__ import print_function
+from collections import defaultdict
+import argparse
+import sys
+import math
+
+def GetArgs():
+    parser = argparse.ArgumentParser(
+        description = "Use a greedy framework to select pronunciation candidates"
+        "from three sources: a reference lexicon, G2P lexicon and phonetic-decoding"
+        "(PD) lexicon. Basically, this script implements the Alg. 1 in the paper:"
+        "Acoustic data-driven lexicon learning based on a greedy pronunciation "
+        "selection framework, by X. Zhang, V. Mahonar, D. Povey and S. Khudanpur,"
+        "Interspeech 2017. The inputs are an arc-stats file, containing "
+        "acoustic evidence (tau_{uwb} in the paper) and three source lexicons "
+        "(phonetic-decoding(PD)/G2P/ref). The outputs is the learned lexicon for"
+        "all words in the arc_stats (acoustic evidence) file.",
+        epilog = "See steps/dict/learn_lexicon_greedy.sh for example.")
+    parser.add_argument("--alpha", type = str, default = "0,0,0",
+                        help = "Scaling factors for the likelihood reduction threshold."
+                        "of three pronunciaiton candidate sources: phonetic-decoding (PD),"
+                        "G2P and reference. The valid range of each dimension is [0, 1], and"
+                        "a large value means we prune pronunciations from this source more"
+                        "aggressively. Setting a dimension to zero means we never want to remove"
+                        "pronunciaiton from that source. See Section 4.3 in the paper for details.")
+    parser.add_argument("--beta", type = str, default = "0,0,0",
+                        help = "smoothing factors for the likelihood reduction term."
+                        "of three pronunciaiton candidate sources: phonetic-decoding (PD),"
+                        "G2P and reference. The valid range of each dimension is [0, 100], and"
+                        "a large value means we prune pronunciations from this source more"
+                        "aggressively. See Section 4.3 in the paper for details.")
+    parser.add_argument("--delta", type = float, default = 0.000000001,
+                        help = "Floor value of the pronunciation posterior statistics."
+                        "The valid range is (0, 0.01),"
+                        "See Section 3 in the paper for details.")
+    parser.add_argument("silence_phones_file", metavar = "<silphone-file>", type = str,
+                        help = "File containing a list of silence phones.")
+    parser.add_argument("arc_stats_file", metavar = "<arc-stats-file>", type = str,
+                        help = "File containing word-pronunciation statistics obtained from lattices; "
+                        "each line must be <word> <utt-id> <start-frame> <count> <phones>")
+    parser.add_argument("word_counts_file", metavar = "<counts-file>", type = str,
+                        help = "File containing word counts in acoustic training data; "
+                        "each line must be <word> <count>.")
+    parser.add_argument("ref_lexicon", metavar = "<reference-lexicon>", type = str,
+                        help = "The reference lexicon (most probably hand-derived)."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("g2p_lexicon", metavar = "<g2p-expanded-lexicon>", type = str,
+                        help = "Candidate ronouciations from G2P results."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("pd_lexicon", metavar = "<phonetic-decoding-lexicon>", type = str,
+                        help = "Candidate ronouciations from phonetic decoding results."
+                        "Each line must be <word> <phones>")
+    parser.add_argument("learned_lexicon", metavar = "<learned-lexicon>", type = str,
+                        help = "Learned lexicon.")
+
+
+    print (' '.join(sys.argv), file=sys.stderr)
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    args.silence_phones_file_handle = open(args.silence_phones_file)
+    if args.arc_stats_file == "-":
+        args.arc_stats_file_handle = sys.stdin
+    else:
+        args.arc_stats_file_handle = open(args.arc_stats_file)
+    args.word_counts_file_handle = open(args.word_counts_file)
+    args.ref_lexicon_handle = open(args.ref_lexicon)
+    args.g2p_lexicon_handle = open(args.g2p_lexicon)
+    args.pd_lexicon_handle = open(args.pd_lexicon)
+    args.learned_lexicon_handle = open(args.learned_lexicon, "w")
+    
+    alpha = args.alpha.strip().split(',')
+    if len(alpha) is not 3:
+        raise Exception('Invalid alpha ', args.alpha)
+    for i in range(0,3):
+        if float(alpha[i]) < 0 or float(alpha[i]) > 1:
+            raise Exception('alaph ', alpha[i], 
+                            ' is invalid, it must be within [0, 1].')
+        if float(alpha[i]) == 0:
+            alpha[i] = -1e-3
+        # The absolute likelihood loss (search for loss_abs) is supposed to be positive.
+        # But it could be negative near zero because of numerical precision limit.
+        # In this case, even if alpha is set to be zero, which means we never want to
+        # remove pronunciation from that source, the quality score (search for q_b)
+        # could still be negative, which means this pron could be potentially removed.
+        # To prevent this, we set alpha as a negative value near zero to ensure
+        # q_b is always positive.
+
+    args.alpha = [float(alpha[0]), float(alpha[1]), float(alpha[2])]
+    print("[alpha_{pd}, alpha_{g2p}, alpha_{ref}] is: ", args.alpha)
+    exit
+    beta = args.beta.strip().split(',')
+    if len(beta) is not 3:
+        raise Exception('Invalid beta ', args.beta)
+    for i in range(0,3):
+        if float(beta[i]) < 0 or float(beta[i]) > 100:
+            raise Exception('beta ', beta[i], 
+                            ' is invalid, it must be within [0, 100].')
+    args.beta = [float(beta[0]), float(beta[1]), float(beta[2])]
+    print("[beta_{pd}, beta_{g2p}, beta_{ref}] is: ", args.beta)
+
+    if args.delta <= 0 or args.delta > 0.1:
+        raise Exception('delta ', args.delta, ' is invalid, it must be within'
+                        '(0, 0.01).')
+    print("delta is: ", args.delta)
+
+    return args
+
+def ReadArcStats(arc_stats_file_handle):
+    stats = defaultdict(lambda : defaultdict(dict))
+    stats_summed = defaultdict(float)
+    for line in arc_stats_file_handle.readlines():
+        splits = line.strip().split()
+
+        if (len(splits) == 0):
+            continue
+
+        if (len(splits) < 5):
+            raise Exception('Invalid format of line ' + line
+                                + ' in ' + arc_stats_file)
+        utt = splits[1]
+        start_frame = int(splits[2])
+        word = splits[0]
+        count = float(splits[3])
+        phones = splits[4:]
+        phones = ' '.join(phones)
+        stats[word][(utt, start_frame)][phones] = count
+        stats_summed[(word, phones)] += count
+    return stats, stats_summed
+
+def ReadWordCounts(word_counts_file_handle):
+    counts = {}
+    for line in word_counts_file_handle.readlines():
+        splits = line.strip().split()
+        if len(splits) < 2:
+            raise Exception('Invalid format of line ' + line
+                                + ' in counts file.')
+        word = splits[0]
+        count = int(splits[1])
+        counts[word] = count
+    return counts
+
+def ReadLexicon(args, lexicon_file_handle, counts):
+    # we're skipping any word not in counts (not seen in training data),
+    # cause we're only learning prons for words who have acoustic examples.
+    lexicon = defaultdict(set)
+    for line in lexicon_file_handle.readlines():
+        splits = line.strip().split()
+        if len(splits) == 0:
+            continue
+        if len(splits) < 2:
+            raise Exception('Invalid format of line ' + line
+                                + ' in lexicon file.')
+        word = splits[0]
+        if word not in counts:
+            continue
+        phones = ' '.join(splits[1:])
+        lexicon[word].add(phones)
+    return lexicon
+
+def FilterPhoneticDecodingLexicon(args, pd_lexicon):
+    # We want to remove all candidates which contain silence phones
+    silphones = set()
+    for line in args.silence_phones_file_handle:
+        silphones.add(line.strip())
+    rejected_candidates = set()
+    for word, prons in pd_lexicon.iteritems():
+        for pron in prons:
+            for phone in pron.split():
+                if phone in silphones:
+                   rejected_candidates.add((word, pron))
+                   break
+    for word, pron in rejected_candidates:
+        pd_lexicon[word].remove(pron)
+    return pd_lexicon
+
+# One iteration of Expectation-Maximization computation (Eq. 3-4 in the paper).
+def OneEMIter(args, word, stats, prons, pron_probs, debug=False):
+    prob_acc = [0.0 for i in range(len(prons[word]))]
+    s = sum(pron_probs)
+    for i in range(len(pron_probs)):
+        pron_probs[i] = pron_probs[i] / s
+    log_like = 0.0
+    for (utt, start_frame) in stats[word]:
+        prob = []
+        soft_counts = []
+        for i in range(len(prons[word])):
+            phones = prons[word][i]
+            soft_count = stats[word][(utt, start_frame)].get(phones, 0)
+            if soft_count < args.delta: 
+                soft_count = args.delta
+            soft_counts.append(soft_count)
+        prob = [i[0] * i[1] for i in zip(soft_counts, pron_probs)]
+        for i in range(len(prons[word])):
+            prob_acc[i] += prob[i] / sum(prob)
+        log_like += math.log(sum(prob))
+    pron_probs = [1.0 / float(len(stats[word])) * p for p in prob_acc]
+    log_like = 1.0 / float(len(stats[word])) * log_like
+    if debug:
+        print("Log_like of the word: ", log_like, "pron probs: ", pron_probs)
+    return pron_probs, log_like
+
+def SelectPronsGreedy(args, stats, counts, ref_lexicon, g2p_lexicon, pd_lexicon, dianostic_info=False):
+    prons = defaultdict(list) # Put all possible prons from three source lexicons into this dictionary
+    src = {} # Source of each (word, pron) pair: 'P' = phonetic-decoding, 'G' = G2P, 'R' = reference
+    learned_lexicon = defaultdict(set) # Put all selected prons in this dictionary
+    for lexicon in ref_lexicon, g2p_lexicon, pd_lexicon:
+        for word in lexicon:
+            for pron in lexicon[word]:
+                prons[word].append(pron)
+    for word in prons:
+        for pron in prons[word]:
+            if word in pd_lexicon and pron in pd_lexicon[word]:
+                src[(word, pron)] = 'P'
+            if word in g2p_lexicon and pron in g2p_lexicon[word]:
+                src[(word, pron)] = 'G'
+            if word in ref_lexicon and pron in ref_lexicon[word]:
+                src[(word, pron)] = 'R'
+   
+    for word in prons:
+        if word not in stats:
+            continue
+        n = len(prons[word])
+        pron_probs = [1/float(n) for i in range(n)]
+        if dianostic_info:
+            print("pronunciations of word '{}': {}".format(word, prons[word]))
+        active_indexes = set(range(len(prons[word])))
+       
+        deleted_prons = [] # indexes of prons to be deleted
+        soft_counts_normalized = []
+        while len(active_indexes) > 1:
+            log_like = 1.0
+            log_like_last = -1.0
+            num_iters = 0
+            while abs(log_like - log_like_last) > 1e-7:
+                num_iters += 1
+                log_like_last = log_like
+                pron_probs, log_like = OneEMIter(args, word, stats, prons, pron_probs, False)
+                if log_like_last == 1.0 and len(soft_counts_normalized) == 0: # the first iteration
+                    soft_counts_normalized = pron_probs
+                    if dianostic_info: 
+                        print("Avg.(over all egs) soft counts: {}".format(soft_counts_normalized))
+            if dianostic_info:
+                print("\n Log_like after {} iters of EM: {}, estimated pron_probs: {} \n".format(
+                        num_iters, log_like, pron_probs))
+            candidates_to_delete = []
+            
+            for i in active_indexes:
+                pron_probs_mod = [p for p in pron_probs]
+                pron_probs_mod[i] = 0.0
+                for j in range(len(pron_probs_mod)):
+                    if j in active_indexes and j != i:
+                        pron_probs_mod[j] += 0.01
+                pron_probs_mod = [s / sum(pron_probs_mod) for s in pron_probs_mod]
+                log_like2 = 1.0
+                log_like2_last = -1.0
+                num_iters2 = 0
+                # Running EM until convengence
+                while abs(log_like2 - log_like2_last) > 0.001 :
+                    num_iters2 += 1
+                    log_like2_last = log_like2
+                    pron_probs_mod, log_like2 = OneEMIter(args, word, stats,
+                                                          prons, pron_probs_mod, False)
+                
+                loss_abs = log_like - log_like2 # absolute likelihood loss before normalization
+                # (supposed to be positive, but could be negative near zero because of numerical precision limit).
+                log_delta = math.log(args.delta)
+                thr = -log_delta
+                loss = loss_abs
+                source = src[(word, prons[word][i])]
+                if dianostic_info:
+                    print("\n set the pron_prob of '{}' whose source is {}, to zero results in {}"
+                    " loss in avg. log-likelihood; Num. iters until converging:{}. ".format(
+                      prons[word][i], source, loss, num_iters2))
+                # Compute quality score q_b = loss_abs * / (M_w + beta_s(b)) + alpha_s(b) * log_delta
+                # See Sec. 4.3 and Alg. 1 in the paper.
+                if source == 'P':
+                   thr *= args.alpha[0]
+                   loss *= float(len(stats[word])) / (float(len(stats[word])) + args.beta[0])
+                if source == 'G':
+                   thr *= args.alpha[1]
+                   loss *= float(len(stats[word])) / (float(len(stats[word])) + args.beta[1])
+                if source == 'R':
+                   thr *= args.alpha[2]
+                   loss *= float(len(stats[word])) / (float(len(stats[word])) + args.beta[2])
+                if loss - thr < 0: # loss - thr here is just q_b
+                   if dianostic_info:
+                       print("Smoothed log-like loss {} is smaller than threshold {} so that the quality"
+                             "score {} is negative, adding the pron to the list of candidates to delete"
+                             ". ".format(loss, thr, loss-thr))
+                   candidates_to_delete.append((loss-thr, i))
+            if len(candidates_to_delete) == 0:
+                break
+            candidates_to_delete_sorted = sorted(candidates_to_delete, 
+                                                 key=lambda candidates_to_delete: candidates_to_delete[0])
+
+            deleted_candidate = candidates_to_delete_sorted[0]
+            active_indexes.remove(deleted_candidate[1])
+            pron_probs[deleted_candidate[1]] = 0.0
+            for i in range(len(pron_probs)):
+                if i in active_indexes:
+                    pron_probs[i] += 0.01
+            pron_probs = [s / sum(pron_probs) for s in pron_probs]
+            source = src[(word, prons[word][deleted_candidate[1]])]
+            pron = prons[word][deleted_candidate[1]]
+            soft_count = soft_counts_normalized[deleted_candidate[1]]
+            quality_score = deleted_candidate[0]
+            # This part of diagnostic info provides hints to the user on how to adjust the parameters.
+            if dianostic_info:
+                print("removed pron {}, from source {} with quality score {:.5f}".format(
+                        pron, source, quality_score)) 
+                if (source == 'P' and soft_count > 0.7 and len(stats[word]) > 5):
+                    print("WARNING: alpha_{pd} or beta_{pd} may be too large!"
+                          "    For the word '{}' whose count is {}, the candidate "
+                          "    pronunciation from phonetic decoding '{}' with normalized "
+                          "    soft count {} (out of 1) is rejected. It shouldn't have been"
+                          "    rejected if alpha_{pd} is smaller than {}".format(
+                            word, len(stats[word]), pron, soft_count, -loss / log_delta, 
+                            -args.alpha[0] * len(stats[word]) + (objf_change + args.beta[0])),
+                            file=sys.stderr)
+                    if loss_abs > thr:
+                        print("    or beta_{pd} is smaller than {}".format(
+                                (loss_abs / thr - 1) * len(stats[word])), file=sys.stderr)
+                if (source == 'G' and soft_count > 0.7 and len(stats[word]) > 5):
+                    print("WARNING: alpha_{g2p} or beta_{g2p} may be too large!"
+                          "    For the word '{}' whose count is {}, the candidate "
+                          "    pronunciation from G2P '{}' with normalized "
+                          "    soft count {} (out of 1) is rejected. It shouldn't have been"
+                          "    rejected if alpha_{g2p} is smaller than {} ".format(
+                            word, len(stats[word]), pron, soft_count, -loss / log_delta, 
+                            -args.alpha[1] * len(stats[word]) + (objf_change + args.beta[1])),
+                          file=sys.stderr)
+                    if loss_abs > thr:
+                        print("    or beta_{g2p} is smaller than {}.".format((
+                                loss_abs / thr - 1) * len(stats[word])), file=sys.stderr)
+            deleted_prons.append(deleted_candidate[1])
+        for i in range(len(prons[word])):
+            if i not in deleted_prons:
+                learned_lexicon[word].add(prons[word][i])
+
+    return learned_lexicon
+
+def WriteLearnedLexicon(learned_lexicon, file_handle):
+    for word, prons in learned_lexicon.iteritems():
+        for pron in prons:
+            print('{0} {1}'.format(word, pron), file=file_handle)
+    file_handle.close()
+
+def Main():
+    args = GetArgs()
+    
+    # Read in three lexicon sources, word counts, and pron stats.
+    counts = ReadWordCounts(args.word_counts_file_handle)
+    ref_lexicon = ReadLexicon(args, args.ref_lexicon_handle, counts)
+    g2p_lexicon = ReadLexicon(args, args.g2p_lexicon_handle, counts)
+    pd_lexicon =  ReadLexicon(args, args.pd_lexicon_handle, counts)
+    stats, stats_summed = ReadArcStats(args.arc_stats_file_handle)
+    pd_lexicon = FilterPhoneticDecodingLexicon(args, pd_lexicon)
+                  
+    # Select prons to construct the learned lexicon.
+    learned_lexicon = SelectPronsGreedy(args, stats, counts, ref_lexicon, g2p_lexicon, pd_lexicon)
+    
+    # Write the learned prons for words out of the ref. vocab into learned_lexicon_oov.
+    WriteLearnedLexicon(learned_lexicon, args.learned_lexicon_handle)
+
+if __name__ == "__main__":
+    Main()
diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py
index 503721c23d1..6bf0ea4932c 100644
--- a/egs/wsj/s5/steps/libs/common.py
+++ b/egs/wsj/s5/steps/libs/common.py
@@ -10,6 +10,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import math
@@ -316,7 +317,7 @@ def read_kaldi_matrix(matrix_file):
     'matrix_file' and stores it as a list of rows, where each row is a list.
     """
     try:
-        lines = map(lambda x: x.split(), open(matrix_file).readlines())
+        lines = [x.split() for x in open(matrix_file).readlines()]
         first_field = lines[0][0]
         last_field = lines[-1][-1]
         lines[0] = lines[0][1:]
@@ -326,7 +327,7 @@ def read_kaldi_matrix(matrix_file):
                 "Kaldi matrix file has incorrect format, "
                 "only text format matrix files can be read by this script")
         for i in range(len(lines)):
-            lines[i] = map(lambda x: int(float(x)), lines[i])
+            lines[i] = [int(float(x)) for x in lines[i]]
         return lines
     except IOError:
         raise Exception("Error while reading the kaldi matrix file "
@@ -348,7 +349,7 @@ def write_kaldi_matrix(output_file, matrix):
             if num_cols != len(matrix[row_index]):
                 raise Exception("All the rows of a matrix are expected to "
                                 "have the same length")
-            f.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+            f.write(" ".join([str(x) for x in matrix[row_index]]))
             if row_index != num_rows - 1:
                 f.write("\n")
         f.write(" ]")
@@ -508,7 +509,7 @@ def compute_idct_matrix(K, N, cepstral_lifter=0):
         lifter_coeffs = compute_lifter_coeffs(cepstral_lifter, K)
         for k in range(0, K):
             for n in range(0, N):
-                matrix[n][k] = matrix[n][k] / lifter_coeffs[k]
+                matrix[n][k] = float(matrix[n][k]) / lifter_coeffs[k]
 
     return matrix
 
diff --git a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py
index 1afc26ff163..97da5e04962 100755
--- a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py
+++ b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py
@@ -322,7 +322,7 @@ def parse_progress_logs_for_param_diff(exp_dir, pattern):
         groups = mat_obj.groups()
         iteration = groups[0]
         differences = parse_difference_string(groups[1])
-        component_names = component_names.union(differences.keys())
+        component_names = component_names.union(list(differences.keys()))
         progress_per_iter[int(iteration)] = differences
 
     component_names = list(component_names)
@@ -435,14 +435,14 @@ def parse_prob_logs(exp_dir, key='accuracy', output="output"):
         raise KaldiLogParseException("Could not find any lines with {k} in "
                 " {l}".format(k=key, l=valid_prob_files))
 
-    iters = list(set(valid_objf.keys()).intersection(train_objf.keys()))
+    iters = list(set(valid_objf.keys()).intersection(list(train_objf.keys())))
     if not iters:
         raise KaldiLogParseException("Could not any common iterations with"
                 " key {k} in both {tl} and {vl}".format(
                     k=key, tl=train_prob_files, vl=valid_prob_files))
     iters.sort()
-    return list(map(lambda x: (int(x), float(train_objf[x]),
-                               float(valid_objf[x])), iters))
+    return list([(int(x), float(train_objf[x]),
+                               float(valid_objf[x])) for x in iters])
 
 def parse_rnnlm_prob_logs(exp_dir, key='objf'):
     train_prob_files = "%s/log/train.*.*.log" % (exp_dir)
@@ -498,14 +498,14 @@ def parse_rnnlm_prob_logs(exp_dir, key='objf'):
         raise KaldiLogParseException("Could not find any lines with {k} in "
                 " {l}".format(k=key, l=valid_prob_files))
 
-    iters = list(set(valid_objf.keys()).intersection(train_objf.keys()))
+    iters = list(set(valid_objf.keys()).intersection(list(train_objf.keys())))
     if not iters:
         raise KaldiLogParseException("Could not any common iterations with"
                 " key {k} in both {tl} and {vl}".format(
                     k=key, tl=train_prob_files, vl=valid_prob_files))
     iters.sort()
-    return map(lambda x: (int(x), float(train_objf[x]),
-                          float(valid_objf[x])), iters)
+    return [(int(x), float(train_objf[x]),
+                          float(valid_objf[x])) for x in iters]
 
 
 
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
index 6afb43824fd..c932a9c54f7 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
@@ -7,6 +7,8 @@
 """ This is a module with methods which will be used by scripts for training of
 deep neural network acoustic model with chain objective.
 """
+from __future__ import division
+from __future__ import print_function
 
 import logging
 import math
@@ -413,8 +415,7 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts,
                     rand_prune=rand_prune))
 
     # the above command would have generated dir/{1..num_lda_jobs}.lda_stats
-    lda_stat_files = list(map(lambda x: '{0}/{1}.lda_stats'.format(dir, x),
-                              range(1, num_lda_jobs + 1)))
+    lda_stat_files = ['{0}/{1}.lda_stats'.format(dir, x) for x in range(1, num_lda_jobs + 1)]
 
     common_lib.execute_command(
         """{command} {dir}/log/sum_transform_stats.log \
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py
index a2892a090f3..1a038cc23f2 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/common.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py
@@ -7,6 +7,7 @@
 """This module contains classes and methods common to training of
 nnet3 neural networks.
 """
+from __future__ import division
 
 import argparse
 import glob
@@ -528,13 +529,13 @@ def smooth_presoftmax_prior_scale_vector(pdf_counts,
                                          presoftmax_prior_scale_power=-0.25,
                                          smooth=0.01):
     total = sum(pdf_counts)
-    average_count = total/len(pdf_counts)
+    average_count = float(total) / len(pdf_counts)
     scales = []
     for i in range(len(pdf_counts)):
         scales.append(math.pow(pdf_counts[i] + smooth * average_count,
                                presoftmax_prior_scale_power))
     num_pdfs = len(pdf_counts)
-    scaled_counts = list(map(lambda x: x * float(num_pdfs) / sum(scales), scales))
+    scaled_counts = [x * float(num_pdfs) / sum(scales) for x in scales]
     return scaled_counts
 
 
@@ -564,7 +565,7 @@ def get_model_combine_iters(num_iters, num_epochs,
         in the final model-averaging phase.  (note: it's a weighted average
         where the weights are worked out from a subset of training data.)"""
 
-    approx_iters_per_epoch_final = num_archives/num_jobs_final
+    approx_iters_per_epoch_final = float(num_archives) / num_jobs_final
     # Note: it used to be that we would combine over an entire epoch,
     # but in practice we very rarely would use any weights from towards
     # the end of that range, so we are changing it to use not
@@ -581,8 +582,8 @@ def get_model_combine_iters(num_iters, num_epochs,
     # But if this value is > max_models_combine, then the models
     # are subsampled to get these many models to combine.
 
-    num_iters_combine_initial = min(approx_iters_per_epoch_final/2 + 1,
-                                    num_iters/2)
+    num_iters_combine_initial = min(int(approx_iters_per_epoch_final/2) + 1,
+                                    int(num_iters/2))
 
     if num_iters_combine_initial > max_models_combine:
         subsample_model_factor = int(
@@ -610,8 +611,7 @@ def get_learning_rate(iter, num_jobs, num_iters, num_archives_processed,
         effective_learning_rate = (
                 initial_effective_lrate
                 * math.exp(num_archives_processed
-                           * math.log(final_effective_lrate
-                                      / initial_effective_lrate)
+                           * math.log(float(final_effective_lrate) / initial_effective_lrate)
                            / num_archives_to_process))
 
     return num_jobs * effective_learning_rate
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
index cc5c9693a12..f2722350e41 100644
--- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
+++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
@@ -348,8 +348,7 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts,
                     rand_prune=rand_prune))
 
     # the above command would have generated dir/{1..num_lda_jobs}.lda_stats
-    lda_stat_files = list(map(lambda x: '{0}/{1}.lda_stats'.format(dir, x),
-                              range(1, num_lda_jobs + 1)))
+    lda_stat_files = ['{0}/{1}.lda_stats'.format(dir, x) for x in range(1, num_lda_jobs + 1)]
 
     common_lib.execute_command(
         """{command} {dir}/log/sum_transform_stats.log \
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py
index e870c1a60cf..db4cb392f10 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py
@@ -6,6 +6,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import math
 import re
 import sys
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py
index 9a856bc6fe1..7846c983b19 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py
@@ -9,6 +9,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import math
 import re
 import sys
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/convolution.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/convolution.py
index be8bcaefedf..5597ff0e216 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/convolution.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/convolution.py
@@ -7,6 +7,7 @@
 """ This module has the implementation of convolutional layers.
 """
 from __future__ import print_function
+from __future__ import division
 import math
 import re
 import sys
@@ -880,7 +881,7 @@ def _generate_normal_resblock_config(self):
         num_filters_out = self.config['num-filters']
 
         if height_out != height_in:
-            if height_out < height_in / 2 - 1 or height_out > height_in /  2 + 1:
+            if height_out < height_in / 2 - 1 or height_out > height_in / 2 + 1:
                 raise RuntimeError("Expected height-out to be about half height-in, or the same: "
                                    "height-in={0} height-out={1}".format(height_in, height_out))
             if not time_period_out % 2 == 0:
@@ -1030,7 +1031,7 @@ def _generate_bottleneck_resblock_config(self):
         num_filters_out = self.config['num-filters']
 
         if height_out != height_in:
-            if height_out < height_in / 2 - 1 or height_out > height_in /  2 + 1:
+            if height_out < height_in / 2 - 1 or height_out > height_in / 2 + 1:
                 raise RuntimeError("Expected height-out to be about half height-in, or the same: "
                                    "height-in={0} height-out={1}".format(height_in, height_out))
             height_subsample = 2
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py
index ede0201f572..5ac2ed59003 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py
@@ -81,6 +81,7 @@
         'linear-component': xlayers.XconfigLinearComponent,
         'affine-component': xlayers.XconfigAffineComponent,
         'scale-component':  xlayers.XconfigPerElementScaleComponent,
+        'dim-range-component': xlayers.XconfigDimRangeComponent,
         'offset-component':  xlayers.XconfigPerElementOffsetComponent,
         'combine-feature-maps-layer': xlayers.XconfigCombineFeatureMapsLayer
 }
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/trivial_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/trivial_layers.py
index f91258bab04..2728ad40639 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/trivial_layers.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/trivial_layers.py
@@ -580,3 +580,67 @@ def _generate_config(self):
             self.name, input_desc))
         configs.append(line)
         return configs
+
+
+class XconfigDimRangeComponent(XconfigLayerBase):
+    """This class is for parsing lines like
+     'dim-range-component name=feature1 input=Append(-3,0,3) dim=40 dim-offset=0'
+    which will produce just a single component, of part of the input.
+    Parameters of the class, and their defaults:
+      input='[-1]'             [Descriptor giving the input of the layer.]
+      dim=-1                   [Dimension of the output.]
+      dim-offset=0             [Dimension offset of the input.]
+    """
+    def __init__(self, first_token, key_to_value, prev_names=None):
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def set_default_configs(self):
+        self.config = {'input': '[-1]',
+                       'dim': -1,
+                       'dim-offset': 0 }
+
+    def check_configs(self):
+        input_dim = self.descriptors['input']['dim']
+        if self.config['dim'] <= 0:
+            raise RuntimeError("'dim' must be specified and > 0.")
+        elif self.config['dim'] > input_dim:
+            raise RuntimeError("'dim' must be specified and lower than the input dim.")
+        if self.config['dim-offset'] < 0 :
+            raise RuntimeError("'dim-offset' must be specified and >= 0.")
+        elif self.config['dim-offset'] + self.config['dim'] > input_dim:
+            raise RuntimeError("'dim-offset' plus output dim must be lower than the input dim.")
+
+    def output_name(self, auxiliary_output=None):
+        assert auxiliary_output is None
+        return self.name
+
+    def output_dim(self, auxiliary_output=None):
+        assert auxiliary_output is None
+        output_dim = self.config['dim']
+        if output_dim <= 0:
+            self.config['dim'] = self.descriptors['input']['dim']
+        return output_dim
+
+    def get_full_config(self):
+        ans = []
+        config_lines = self._generate_config()
+
+        for line in config_lines:
+            for config_name in ['ref', 'final']:
+                # we do not support user specified matrices in this layer
+                # so 'ref' and 'final' configs are the same.
+                ans.append((config_name, line))
+        return ans
+
+    def _generate_config(self):
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        input_node = self.descriptors['input']['final-string']
+        output_dim = self.config['dim']
+        dim_offset = self.config['dim-offset']
+
+        configs = []
+        line = ('dim-range-node name={0} input-node={1} dim={2} dim-offset={3}'.format(
+            self.name, input_node, output_dim, dim_offset))
+        configs.append(line)
+        return configs
diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py
index 08de18167cd..0188248d694 100644
--- a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py
+++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py
@@ -184,7 +184,7 @@ def convert_value_to_type(key, dest_type, string_value):
 # Also, in any place a raw input/layer/output name can appear, we accept things
 # like [-1] meaning the previous input/layer/output's name, or [-2] meaning the
 # last-but-one input/layer/output, and so on.
-class Descriptor:
+class Descriptor(object):
     def __init__(self,
                  descriptor_string = None,
                  prev_names = None):
@@ -595,7 +595,7 @@ def parse_config_line(orig_config_line):
 
     rest_of_line = ' '.join(fields)
     # rest of the line can be of the form 'a=1 b=" x=1 y=2 " c=Append( i1, i2)'
-    positions = list(map(lambda x: x.start(), re.finditer('"', rest_of_line)))
+    positions = [x.start() for x in re.finditer('"', rest_of_line)]
     if not len(positions) % 2 == 0:
         raise RuntimeError("Double-quotes should occur in pairs")
 
diff --git a/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py b/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
index 6e7bff3fa17..b5338b516e8 100755
--- a/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
+++ b/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
@@ -4,14 +4,16 @@
 # Creates the nnet.config and hidde_*.config scripts used in train_pnorm_multisplice.sh
 # Parses the splice string to generate relevant variables for get_egs.sh, get_lda.sh and nnet/hidden.config files
 
+from __future__ import division
+from __future__ import print_function
 import re, argparse, sys, math, warnings
 
 # returns the set of frame indices required to perform the convolution
 # between sequences with frame indices in x and y
 def get_convolution_index_set(x, y):
   z = []
-  for i in xrange(len(x)):
-    for j in xrange(len(y)):
+  for i in range(len(x)):
+    for j in range(len(y)):
       z.append(x[i]+y[j])
   z = list(set(z))
   z.sort()
@@ -19,7 +21,7 @@ def get_convolution_index_set(x, y):
 
 def parse_splice_string(splice_string):
   layerwise_splice_indexes = splice_string.split('layer')[1:]
-  print splice_string.split('layer')
+  print(splice_string.split('layer'))
   contexts={}
   first_right_context = 0 # default value
   first_left_context = 0 # default value
@@ -29,14 +31,14 @@ def parse_splice_string(splice_string):
   try:
     for cur_splice_indexes in layerwise_splice_indexes:
       layer_index, frame_indexes  = cur_splice_indexes.split("/")
-      frame_indexes = map(lambda x: int(x), frame_indexes.split(':'))
+      frame_indexes = [int(x) for x in frame_indexes.split(':')]
       layer_index = int(layer_index)
       assert(layer_index >= 0)
       if layer_index == 0:
         first_left_context = min(frame_indexes)
         first_right_context = max(frame_indexes)
         try:
-          assert(frame_indexes == range(first_left_context, first_right_context+1))
+          assert(frame_indexes == list(range(first_left_context, first_right_context+1)))
         except AssertionError:
           raise Exception('Currently the first splice component just accepts contiguous context.')
         try:
@@ -46,11 +48,11 @@ def parse_splice_string(splice_string):
           left context provided is %d and right context provided is %d.""" % (first_left_context, first_right_context))
         # convolve the current splice indices with the splice indices until last layer
       nnet_frame_indexes = get_convolution_index_set(frame_indexes, nnet_frame_indexes)
-      cur_context = ":".join(map(lambda x: str(x), frame_indexes))
+      cur_context = ":".join([str(x) for x in frame_indexes])
       contexts[layer_index] = cur_context
   except ValueError:
     raise Exception('Unknown format in splice_indexes variable: {0}'.format(params.splice_indexes))
-  print nnet_frame_indexes
+  print(nnet_frame_indexes)
   max_left_context = min(nnet_frame_indexes)
   max_right_context = max(nnet_frame_indexes)
   return [contexts, ' nnet_left_context={0};\n nnet_right_context={1}\n first_left_context={2};\n first_right_context={3}\n'.format(abs(max_left_context), abs(max_right_context), abs(first_left_context), abs(first_right_context) )]
@@ -87,7 +89,7 @@ def create_config_files(output_dir, params):
   except KeyError:
     raise Exception('A splice layer is expected to be the first layer. Provide a context for the first layer.')
 
-  for i in xrange(1, params.num_hidden_layers): #just run till num_hidden_layers-1 since we do not add splice before the final affine transform
+  for i in range(1, params.num_hidden_layers): #just run till num_hidden_layers-1 since we do not add splice before the final affine transform
     lines=[]
     context_len = 1
     if i in contexts:
@@ -109,7 +111,7 @@ def create_config_files(output_dir, params):
 
 
 if __name__ == "__main__":
-  print " ".join(sys.argv)
+  print(" ".join(sys.argv))
   parser = argparse.ArgumentParser()
   parser.add_argument('--splice-indexes', type=str, help='string specifying the indexes for the splice layers throughout the network')
   parser.add_argument('--total-input-dim', type=int, help='dimension of the input to the network')
@@ -127,7 +129,7 @@ def create_config_files(output_dir, params):
   parser.add_argument("output_dir", type=str, help="output directory to store the files")
   params = parser.parse_args() 
   
-  print params
+  print(params)
   if params.mode == "contexts":
     [context, context_variables] = parse_splice_string(params.splice_indexes)
     var_file = open("{0}/vars".format(params.output_dir), "w")
diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/text_to_phones.py b/egs/wsj/s5/steps/nnet3/chain/e2e/text_to_phones.py
index 0ff05e3c48e..2c51cb57750 100755
--- a/egs/wsj/s5/steps/nnet3/chain/e2e/text_to_phones.py
+++ b/egs/wsj/s5/steps/nnet3/chain/e2e/text_to_phones.py
@@ -8,6 +8,7 @@
     to phone transcriptions using the provided lexicon,
     and writes them to standard output.
 """
+from __future__ import print_function
 
 import argparse
 from os.path import join
diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py
index a832f57cd8f..40b65afe273 100755
--- a/egs/wsj/s5/steps/nnet3/chain/train.py
+++ b/egs/wsj/s5/steps/nnet3/chain/train.py
@@ -6,6 +6,8 @@
 
 """ This script is based on steps/nnet3/chain/train.sh
 """
+from __future__ import division
+from __future__ import print_function
 
 import argparse
 import logging
diff --git a/egs/wsj/s5/steps/nnet3/components.py b/egs/wsj/s5/steps/nnet3/components.py
index 34443d586ca..8e879579776 100644
--- a/egs/wsj/s5/steps/nnet3/components.py
+++ b/egs/wsj/s5/steps/nnet3/components.py
@@ -84,7 +84,7 @@ def AddBlockAffineLayer(config_lines, name, input, output_dim, num_blocks):
 def AddPermuteLayer(config_lines, name, input, column_map):
     components = config_lines['components']
     component_nodes = config_lines['component-nodes']
-    permute_indexes = ",".join(map(lambda x: str(x), column_map))
+    permute_indexes = ",".join([str(x) for x in column_map])
     components.append('component name={0}_permute type=PermuteComponent column-map={1}'.format(name, permute_indexes))
     component_nodes.append('component-node name={0}_permute component={0}_permute input={1}'.format(name, input['descriptor']))
 
diff --git a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
index f0a4341d12b..66ff633fbfc 100755
--- a/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
+++ b/egs/wsj/s5/steps/nnet3/convert_nnet2_to_nnet3.py
@@ -6,6 +6,7 @@
 # It requires knowledge of valid components which
 # can be modified in the configuration section below.
 
+from __future__ import print_function
 import argparse, os, tempfile, logging, sys, shutil, fileinput, re
 from collections import defaultdict, namedtuple
 import numpy as np
@@ -51,7 +52,7 @@
 SPLICE_COMPONENTS = [c for c in NODE_NAMES if "Splice" in c]
 AFFINE_COMPONENTS = [c for c in NODE_NAMES if "Affine" in c]
 
-KNOWN_COMPONENTS = NODE_NAMES.keys()
+KNOWN_COMPONENTS = list(NODE_NAMES.keys())
 # End configuration section
 
 logger = logging.getLogger(__name__)
diff --git a/egs/wsj/s5/steps/nnet3/dot/descriptor_parser.py b/egs/wsj/s5/steps/nnet3/dot/descriptor_parser.py
index a46d144d0b6..ee6fa11b5c9 100644
--- a/egs/wsj/s5/steps/nnet3/dot/descriptor_parser.py
+++ b/egs/wsj/s5/steps/nnet3/dot/descriptor_parser.py
@@ -33,7 +33,7 @@ def ParseSubsegmentsAndArguments(segment_endpoints, sub_segments, arguments, inp
             else:
                 arguments.append(sub_segment_name)
     else:
-        arguments = map(lambda x: re.sub(',','', x.strip()), input_string[segment_endpoints[0]:segment_endpoints[1]+1].split())
+        arguments = [re.sub(',','', x.strip()) for x in input_string[segment_endpoints[0]:segment_endpoints[1]+1].split()]
         sub_segments = []
     return sub_segments, arguments
 
diff --git a/egs/wsj/s5/steps/nnet3/dot/nnet3_to_dot.py b/egs/wsj/s5/steps/nnet3/dot/nnet3_to_dot.py
index f8cd357fa3b..4230b32aa7c 100755
--- a/egs/wsj/s5/steps/nnet3/dot/nnet3_to_dot.py
+++ b/egs/wsj/s5/steps/nnet3/dot/nnet3_to_dot.py
@@ -189,7 +189,7 @@ def ProcessSumDescriptor(segment, parent_node_name, affix, edge_attributes = Non
         sub_segment = segment['sub_segments'][i]
         part_name = "{0}{1}{2}".format(desc_name, sub_segment['name'], i)
         names.append("<{0}> part {1}".format(GetDotNodeName(part_name)['node'], i))
-        dot_graph += DescriptorSegmentToDot(sub_segment, "{0}:{1}".format(desc_name, part_name), desc_name+"_"+str(i))
+        dot_graph += DescriptorSegmentToDot(sub_segment, "{0}:{1}".format(desc_name, part_name), "{0}_{1}".format(desc_name, i))
 
     # link the sum node parts to corresponding segments
     part_index = len(segment['sub_segments'])
@@ -321,7 +321,7 @@ def Nnet3ComponentToDot(component_config, component_attributes = None):
     label = ''
     if component_attributes is None:
         component_attributes = component_config.keys()
-    attributes_to_print = set(component_attributes).intersection(component_config.keys())
+    attributes_to_print = set(component_attributes).intersection(list(component_config.keys()))
     # process the known fields
     for key in attributes_to_print:
         if key in component_config:
diff --git a/egs/wsj/s5/steps/nnet3/get_successful_models.py b/egs/wsj/s5/steps/nnet3/get_successful_models.py
index 3661d91b8d5..e6dcf376a51 100755
--- a/egs/wsj/s5/steps/nnet3/get_successful_models.py
+++ b/egs/wsj/s5/steps/nnet3/get_successful_models.py
@@ -56,7 +56,7 @@
         if (loss[max_index] - loss[i]) <= args.difference_threshold:
             accepted_models.append(i+1)
 
-    model_list = " ".join(map(lambda x: str(x), accepted_models))
+    model_list = " ".join([str(x) for x in accepted_models])
     print(model_list)
 
     if len(accepted_models) != args.num_models:
diff --git a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
index b80a8d4045b..8a533465f07 100755
--- a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
+++ b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py
@@ -181,7 +181,7 @@ def ParseSpliceString(splice_indexes, label_delay=None):
     splice_array = []
     try:
         for i in range(len(split1)):
-            indexes = map(lambda x: int(x), split1[i].strip().split(","))
+            indexes = [int(x) for x in split1[i].strip().split(",")]
             print(indexes)
             if len(indexes) < 1:
                 raise ValueError("invalid --splice-indexes argument, too-short element: "
@@ -214,12 +214,12 @@ def ParseLstmDelayString(lstm_delay):
     lstm_delay_array = []
     try:
         for i in range(len(split1)):
-            indexes = map(lambda x: int(x), split1[i].strip().lstrip('[').rstrip(']').strip().split(","))
+            indexes = [int(x) for x in split1[i].strip().lstrip('[').rstrip(']').strip().split(",")]
             if len(indexes) < 1:
                 raise ValueError("invalid --lstm-delay argument, too-short element: "
                                 + lstm_delay)
             elif len(indexes) == 2 and indexes[0] * indexes[1] >= 0:
-                raise ValueError('Warning: ' + str(indexes) + ' is not a standard BLSTM mode. There should be a negative delay for the forward, and a postive delay for the backward.')
+                raise ValueError('Warning: {} is not a standard BLSTM mode. There should be a negative delay for the forward, and a postive delay for the backward.'.format(indexes))
             if len(indexes) == 2 and indexes[0] > 0: # always a negative delay followed by a postive delay
                 indexes[0], indexes[1] = indexes[1], indexes[0]
             lstm_delay_array.append(indexes)
@@ -335,9 +335,9 @@ def ProcessSpliceIndexes(config_dir, splice_indexes, label_delay, num_lstm_layer
 
     # write the files used by other scripts like steps/nnet3/get_egs.sh
     f = open(config_dir + "/vars", "w")
-    print('model_left_context=' + str(left_context), file=f)
-    print('model_right_context=' + str(right_context), file=f)
-    print('num_hidden_layers=' + str(num_hidden_layers), file=f)
+    print('model_left_context={}'.format(left_context), file=f)
+    print('model_right_context={}'.format(right_context), file=f)
+    print('num_hidden_layers={}'.format(num_hidden_layers), file=f)
     # print('initial_right_context=' + str(splice_array[0][-1]), file=f)
     f.close()
 
diff --git a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py
index 162fda16d16..d121be6d899 100644
--- a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py
+++ b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py
@@ -98,21 +98,21 @@
 input_dim = len(splice_array[0]) * args.feat_dim  +  args.ivector_dim
 
 f = open(args.config_dir + "/vars", "w")
-print('left_context=' + str(left_context), file=f)
-print('right_context=' + str(right_context), file=f)
+print('left_context={}'.format(left_context), file=f)
+print('right_context={}'.format(right_context), file=f)
 # the initial l/r contexts are actually not needed.
 # print('initial_left_context=' + str(splice_array[0][0]), file=f)
 # print('initial_right_context=' + str(splice_array[0][-1]), file=f)
-print('num_hidden_layers=' + str(num_hidden_layers), file=f)
+print('num_hidden_layers={}'.format(num_hidden_layers), file=f)
 f.close()
 
 f = open(args.config_dir + "/init.config", "w")
 print('# Config file for initializing neural network prior to', file=f)
 print('# preconditioning matrix computation', file=f)
-print('input-node name=input dim=' + str(args.feat_dim), file=f)
+print('input-node name=input dim={}'.format(args.feat_dim), file=f)
 list=[ ('Offset(input, {0})'.format(n) if n != 0 else 'input' ) for n in splice_array[0] ]
 if args.ivector_dim > 0:
-    print('input-node name=ivector dim=' + str(args.ivector_dim), file=f)
+    print('input-node name=ivector dim={}'.format(args.ivector_dim), file=f)
     list.append('ReplaceIndex(ivector, t, 0)')
 # example of next line:
 # output-node name=output input="Append(Offset(input, -3), Offset(input, -2), Offset(input, -1), ... , Offset(input, 3), ReplaceIndex(ivector, t, 0))"
diff --git a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
index 54c65eb5403..a407869854d 100755
--- a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
+++ b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py
@@ -40,7 +40,6 @@
 
 """
 
-from __future__ import print_function
 import os, argparse, sys, random
 import logging
 import traceback
@@ -163,7 +162,7 @@ def process_multilingual_egs(args):
                        "not include any examples from this lang.")
         logger.info("The proportion of egs from lang {} is {:.2f}. The number of blocks "
                     "per archive for this lang is approximately {:.2f}. "
-                    "{}".format(lang, lang_to_num_examples[lang] / tot_num_egs,
+                    "{}".format(lang, float(lang_to_num_examples[lang]) / tot_num_egs,
                                 blocks_per_archive_this_lang,
                                 warning))
 
@@ -173,11 +172,11 @@ def process_multilingual_egs(args):
     lang_to_num_remaining_egs = [n for n in lang_to_num_examples]
     for archive_index in range(num_archives + 1):  #  +1 is because we write to the last archive in two rounds
         num_remaining_archives = num_archives - archive_index
-        num_remaining_blocks = num_remaining_egs / args.block_size
+        num_remaining_blocks = float(num_remaining_egs) / args.block_size
 
         last_round = (archive_index == num_archives)
         if not last_round:
-            num_blocks_this_archive = int(round(num_remaining_blocks / num_remaining_archives))
+            num_blocks_this_archive = int(round(float(num_remaining_blocks) / num_remaining_archives))
             logger.info("Generating archive {} containing {} blocks...".format(archive_index, num_blocks_this_archive))
         else:  # This is the second round for the last archive. Flush all the remaining egs...
             archive_index = num_archives - 1
@@ -194,7 +193,7 @@ def process_multilingual_egs(args):
 
         for block_index in range(num_blocks_this_archive):
             # Find the lang with the highest proportion of remaining examples
-            remaining_proportions = [remain / tot for remain, tot in zip(lang_to_num_remaining_egs, lang_to_num_examples)]
+            remaining_proportions = [float(remain) / tot for remain, tot in zip(lang_to_num_remaining_egs, lang_to_num_examples)]
             lang_index, max_proportion = max(enumerate(remaining_proportions), key=lambda a: a[1])
 
             # Read 'block_size' examples from the selected lang and write them to the current output scp file:
diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py
index 93cbc940c33..572e2cf08b7 100755
--- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py
+++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py
@@ -4,6 +4,7 @@
 #           2016    Vimal Manohar
 # Apache 2.0.
 
+from __future__ import division
 import argparse
 import errno
 import logging
@@ -97,7 +98,7 @@ def get_args():
 
 g_plot_colors = ['red', 'blue', 'green', 'black', 'magenta', 'yellow', 'cyan']
 
-class LatexReport:
+class LatexReport(object):
     """Class for writing a Latex report"""
 
     def __init__(self, pdf_file):
@@ -422,7 +423,7 @@ def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None,
             f.write("\n".join(iter_stat_report))
             f.close()
     if plot:
-        main_component_names = main_stat_tables.keys()
+        main_component_names = list(main_stat_tables.keys())
         main_component_names.sort()
 
         plot_component_names = set(main_component_names)
@@ -528,13 +529,13 @@ def generate_clipped_proportion_plots(exp_dir, output_dir, plot,
     file = open("{dir}/clipped_proportion.log".format(dir=output_dir), "w")
     iter_stat_report = ""
     for row in main_cp_stats:
-        iter_stat_report += "\t".join(map(lambda x: str(x), row)) + "\n"
+        iter_stat_report += "\t".join([str(x) for x in row]) + "\n"
     file.write(iter_stat_report)
     file.close()
 
     if plot:
         main_component_names = (
-            stats_per_dir[exp_dir]['cp_per_iter_per_component'].keys())
+            list(stats_per_dir[exp_dir]['cp_per_iter_per_component'].keys()))
         main_component_names.sort()
         plot_component_names = set(main_component_names)
         for dir in dirs:
@@ -635,22 +636,21 @@ def generate_parameter_diff_plots(exp_dir, output_dir, plot,
                     except KeyError:
                         total_missing_iterations += 1
                         iter_data.append("NA")
-                if (total_missing_iterations/len(component_names) > 20
+                if (float(total_missing_iterations)/len(component_names) > 20
                         and not gave_user_warning):
                     logger.warning("There are more than {0} missing "
                                    "iterations per component. "
                                    "Something might be wrong.".format(
-                                       total_missing_iterations
-                                       / len(component_names)))
+                                       float(total_missing_iterations)/ len(component_names)))
                     gave_user_warning = True
 
                 f.write(" ".join(iter_data)+"\n")
 
     if plot:
         # get the component names
-        diff_type = key_file.keys()[0]
-        main_component_names = stats_per_dir[exp_dir][diff_type][
-            'progress_per_component'].keys()
+        diff_type = list(key_file.keys())[0]
+        main_component_names = list(stats_per_dir[exp_dir][diff_type][
+            'progress_per_component'].keys())
         main_component_names.sort()
         plot_component_names = set(main_component_names)
 
diff --git a/egs/wsj/s5/steps/nnet3/report/summarize_compute_debug_timing.py b/egs/wsj/s5/steps/nnet3/report/summarize_compute_debug_timing.py
index 442ca4e35cf..5c74eaf128c 100755
--- a/egs/wsj/s5/steps/nnet3/report/summarize_compute_debug_timing.py
+++ b/egs/wsj/s5/steps/nnet3/report/summarize_compute_debug_timing.py
@@ -7,6 +7,7 @@
 
 # we're using python 3.x style print but want it to work in python 2.x,
 from __future__ import print_function
+from __future__ import division
 import sys
 import re
 import argparse
@@ -101,7 +102,7 @@ def Main():
     total_time = sum(command_times.values())
     sorted_commands = sorted(command_times.items(), key = lambda x: x[1], reverse = True)
     for item in sorted_commands:
-        print("{c} : time {t} : fraction {f}".format(c=item[0], t=item[1], f=item[1] / total_time))
+        print("{c} : time {t} : fraction {f}".format(c=item[0], t=item[1], f=float(item[1]) / total_time))
 
 
 if __name__ == "__main__":
diff --git a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
index 5445b16e165..9e7e92f6768 100755
--- a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
+++ b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py
@@ -4,6 +4,7 @@
 
 # we're using python 3.x style print but want it to work in python 2.x,
 from __future__ import print_function
+from __future__ import division
 import os
 import argparse
 import shlex
@@ -519,10 +520,10 @@ def MakeConfigs(config_dir, splice_indexes_string,
 
     # write the files used by other scripts like steps/nnet3/get_egs.sh
     f = open(config_dir + "/vars", "w")
-    print('model_left_context=' + str(left_context), file=f)
-    print('model_right_context=' + str(right_context), file=f)
-    print('num_hidden_layers=' + str(num_hidden_layers), file=f)
-    print('num_targets=' + str(num_targets), file=f)
+    print('model_left_context={}'.format(left_context), file=f)
+    print('model_right_context={}'.format(right_context), file=f)
+    print('num_hidden_layers={}'.format(num_hidden_layers), file=f)
+    print('num_targets={}'.format(num_targets), file=f)
     print('add_lda=' + ('true' if add_lda else 'false'), file=f)
     print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f)
     print('objective_type=' + objective_type, file=f)
diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py
index 0c881b4dbdf..e72b29297a4 100755
--- a/egs/wsj/s5/steps/nnet3/train_dnn.py
+++ b/egs/wsj/s5/steps/nnet3/train_dnn.py
@@ -9,6 +9,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import os
@@ -193,7 +194,7 @@ def train(args, run_opts):
     shutil.copy('{0}/tree'.format(args.ali_dir), args.dir)
 
     with open('{0}/num_jobs'.format(args.dir), 'w') as f:
-        f.write(str(num_jobs))
+        f.write('{}'.format(num_jobs))
 
     if args.input_model is None:
         config_dir = '{0}/configs'.format(args.dir)
@@ -301,8 +302,7 @@ def train(args, run_opts):
     num_archives_expanded = num_archives * args.frames_per_eg
     num_archives_to_process = int(args.num_epochs * num_archives_expanded)
     num_archives_processed = 0
-    num_iters = ((num_archives_to_process * 2)
-                 / (args.num_jobs_initial + args.num_jobs_final))
+    num_iters = int(num_archives_to_process * 2 / (args.num_jobs_initial + args.num_jobs_final))
 
     # If do_final_combination is True, compute the set of models_to_combine.
     # Otherwise, models_to_combine will be none.
diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py
index fc73cbc7f3f..ffccf443b99 100755
--- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py
+++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py
@@ -9,6 +9,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import pprint
@@ -321,8 +322,7 @@ def train(args, run_opts):
     num_archives_expanded = num_archives * args.frames_per_eg
     num_archives_to_process = int(args.num_epochs * num_archives_expanded)
     num_archives_processed = 0
-    num_iters = int((num_archives_to_process * 2)
-                 / (args.num_jobs_initial + args.num_jobs_final))
+    num_iters = int((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final))
 
     # If do_final_combination is True, compute the set of models_to_combine.
     # Otherwise, models_to_combine will be none.
diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py
index e797c86b323..c704b0725d3 100755
--- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py
+++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py
@@ -10,6 +10,7 @@
 raw neural network instead of an acoustic model.
 """
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import pprint
@@ -368,8 +369,7 @@ def train(args, run_opts):
     # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2.
     num_archives_to_process = int(args.num_epochs * num_archives)
     num_archives_processed = 0
-    num_iters = ((num_archives_to_process * 2)
-                 / (args.num_jobs_initial + args.num_jobs_final))
+    num_iters = int((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final))
 
     # If do_final_combination is True, compute the set of models_to_combine.
     # Otherwise, models_to_combine will be none.
@@ -509,7 +509,8 @@ def train(args, run_opts):
                 run_opts=run_opts, chunk_width=args.chunk_width,
                 get_raw_nnet_from_am=False,
                 compute_per_dim_accuracy=args.compute_per_dim_accuracy,
-                max_objective_evaluations=args.max_objective_evaluations)
+                max_objective_evaluations=args.max_objective_evaluations,
+                use_multitask_egs=use_multitask_egs)
         else:
             common_lib.force_symlink("{0}.raw".format(num_iters),
                                      "{0}/final.raw".format(args.dir))
diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py
index 25e7dced19b..ab2aa0c4d8d 100755
--- a/egs/wsj/s5/steps/nnet3/train_rnn.py
+++ b/egs/wsj/s5/steps/nnet3/train_rnn.py
@@ -8,6 +8,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import os
@@ -248,7 +249,7 @@ def train(args, run_opts):
     shutil.copy('{0}/tree'.format(args.ali_dir), args.dir)
 
     with open('{0}/num_jobs'.format(args.dir), 'w') as f:
-        f.write(str(num_jobs))
+        f.write('{}'.format(num_jobs))
 
     config_dir = '{0}/configs'.format(args.dir)
     var_file = '{0}/vars'.format(config_dir)
@@ -369,8 +370,7 @@ def train(args, run_opts):
     # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2.
     num_archives_to_process = int(args.num_epochs * num_archives)
     num_archives_processed = 0
-    num_iters = ((num_archives_to_process * 2)
-                 / (args.num_jobs_initial + args.num_jobs_final))
+    num_iters = int((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final))
 
     # If do_final_combination is True, compute the set of models_to_combine.
     # Otherwise, models_to_combine will be none.
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 3b8dc82fe48..f025eb5b343 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -115,7 +115,7 @@ def write_expanded_xconfig_files(config_dir, all_layers):
           '# See also ./xconfig.expanded.2\n', file=xconfig_file_out)
 
     for layer in all_layers:
-        print(str(layer), file=xconfig_file_out)
+        print('{}'.format(layer), file=xconfig_file_out)
     xconfig_file_out.close()
 
     try:
@@ -135,7 +135,7 @@ def write_expanded_xconfig_files(config_dir, all_layers):
 
     for layer in all_layers:
         layer.normalize_descriptors()
-        print(str(layer), file=xconfig_file_out)
+        print('{}'.format(layer), file=xconfig_file_out)
     xconfig_file_out.close()
 
 
diff --git a/egs/wsj/s5/steps/segmentation/internal/find_oov_phone.py b/egs/wsj/s5/steps/segmentation/internal/find_oov_phone.py
index 3e9cbbbf178..038640f6271 100644
--- a/egs/wsj/s5/steps/segmentation/internal/find_oov_phone.py
+++ b/egs/wsj/s5/steps/segmentation/internal/find_oov_phone.py
@@ -8,6 +8,7 @@
 <lang>/phones/align_lexicon.int.
 It prints the OOV phone to stdout, if it can find a single phone
 mapping for the OOV word."""
+from __future__ import print_function
 
 import sys
 
diff --git a/egs/wsj/s5/steps/segmentation/internal/get_default_targets_for_out_of_segments.py b/egs/wsj/s5/steps/segmentation/internal/get_default_targets_for_out_of_segments.py
index e7000b9de00..0361999d904 100755
--- a/egs/wsj/s5/steps/segmentation/internal/get_default_targets_for_out_of_segments.py
+++ b/egs/wsj/s5/steps/segmentation/internal/get_default_targets_for_out_of_segments.py
@@ -14,6 +14,7 @@
 the application and data, this could be [ 0 0 0 ] or [ 0 0 1 ] or
 something with fractional weights.
 """
+from __future__ import division
 
 import argparse
 import logging
@@ -131,7 +132,7 @@ def run(args):
             and np.shape(default_targets)[1] == 3)
 
     with common_lib.smart_open(args.out_targets_ark, 'w') as f:
-        for reco, utts in reco2utt.iteritems():
+        for reco, utts in reco2utt.items():
             reco_mat = np.repeat(default_targets, reco2num_frames[reco],
                                  axis=0)
             utts.sort(key=lambda x: segments[x][1])   # sort on start time
diff --git a/egs/wsj/s5/steps/segmentation/internal/merge_segment_targets_to_recording.py b/egs/wsj/s5/steps/segmentation/internal/merge_segment_targets_to_recording.py
index 8c53e5e8db9..e48afbeb872 100755
--- a/egs/wsj/s5/steps/segmentation/internal/merge_segment_targets_to_recording.py
+++ b/egs/wsj/s5/steps/segmentation/internal/merge_segment_targets_to_recording.py
@@ -9,6 +9,7 @@
 in any of the segments are assigned the default targets vector, specified by
 the option --default-targets or [ 0 0 0 ] if unspecified.
 """
+from __future__ import division
 
 import argparse
 import logging
@@ -158,7 +159,7 @@ def run(args):
     num_reco = 0
 
     with common_lib.smart_open(args.out_targets_ark, 'w') as fh:
-        for reco, utts in reco2utt.iteritems():
+        for reco, utts in reco2utt.items():
             # Read a recording and the list of its utterances from the
             # reco2utt dictionary
             reco_mat = np.repeat(default_targets, reco2num_frames[reco],
diff --git a/egs/wsj/s5/steps/segmentation/internal/merge_targets.py b/egs/wsj/s5/steps/segmentation/internal/merge_targets.py
index 8222eddad8f..a14aef151c2 100755
--- a/egs/wsj/s5/steps/segmentation/internal/merge_targets.py
+++ b/egs/wsj/s5/steps/segmentation/internal/merge_targets.py
@@ -17,6 +17,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import logging
 import numpy as np
diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm.py b/egs/wsj/s5/steps/tfrnnlm/lstm.py
index 5f175212c4b..433dc87b4c6 100644
--- a/egs/wsj/s5/steps/tfrnnlm/lstm.py
+++ b/egs/wsj/s5/steps/tfrnnlm/lstm.py
@@ -203,7 +203,7 @@ def attn_cell():
                                       config.max_grad_norm)
     optimizer = tf.train.GradientDescentOptimizer(self._lr)
     self._train_op = optimizer.apply_gradients(
-        zip(grads, tvars),
+        list(zip(grads, tvars)),
         global_step=tf.contrib.framework.get_or_create_global_step())
 
     self._new_lr = tf.placeholder(
diff --git a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
index 440962a3780..ff6c7263804 100644
--- a/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
+++ b/egs/wsj/s5/steps/tfrnnlm/lstm_fast.py
@@ -218,7 +218,7 @@ def attn_cell():
                                       config.max_grad_norm)
     optimizer = tf.train.GradientDescentOptimizer(self._lr)
     self._train_op = optimizer.apply_gradients(
-        zip(grads, tvars),
+        list(zip(grads, tvars)),
         global_step=tf.contrib.framework.get_or_create_global_step())
 
     self._new_lr = tf.placeholder(
diff --git a/egs/wsj/s5/steps/tfrnnlm/reader.py b/egs/wsj/s5/steps/tfrnnlm/reader.py
index fc3d4d0471c..80cdeccbb26 100644
--- a/egs/wsj/s5/steps/tfrnnlm/reader.py
+++ b/egs/wsj/s5/steps/tfrnnlm/reader.py
@@ -31,7 +31,7 @@ def _read_words(filename):
 
 def _build_vocab(filename):
   words = _read_words(filename)
-  word_to_id = dict(zip(words, range(len(words))))
+  word_to_id = dict(list(zip(words, list(range(len(words))))))
   return word_to_id
 
 
diff --git a/egs/wsj/s5/steps/tfrnnlm/vanilla_rnnlm.py b/egs/wsj/s5/steps/tfrnnlm/vanilla_rnnlm.py
index f3ce1a5c297..ae7a257906e 100644
--- a/egs/wsj/s5/steps/tfrnnlm/vanilla_rnnlm.py
+++ b/egs/wsj/s5/steps/tfrnnlm/vanilla_rnnlm.py
@@ -201,7 +201,7 @@ def attn_cell():
                                       config.max_grad_norm)
     optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)
     self._train_op = optimizer.apply_gradients(
-        zip(grads, tvars),
+        list(zip(grads, tvars)),
         global_step=tf.contrib.framework.get_or_create_global_step())
 
     self._new_lr = tf.placeholder(
diff --git a/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py b/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py
index deb8207c5b7..61c9a3014aa 100755
--- a/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py
+++ b/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py
@@ -17,6 +17,7 @@
 """
 
 from __future__ import print_function
+from __future__ import division
 import argparse
 import collections
 import logging
@@ -231,7 +232,7 @@ def resolve_overlaps(ctms, segments):
             try:
                 index = next(
                     (i for i, line in enumerate(ctm_for_next_utt)
-                     if line[2] + line[3] / 2.0 > overlap / 2.0))
+                    if line[2] + line[3] / 2.0 > overlap / 2.0))
             except StopIteration:
                 # This can happen if there is no word hypothesized after
                 # half the overlap region.
@@ -277,7 +278,7 @@ def run(args):
     segments, reco2utt = read_segments(args.segments)
     ctms = read_ctm(args.ctm_in, segments)
 
-    for reco, utts in reco2utt.iteritems():
+    for reco, utts in reco2utt.items():
         ctms_for_reco = []
         for utt in sorted(utts, key=lambda x: segments[x][1]):
             if (reco, utt) in ctms:
diff --git a/egs/wsj/s5/utils/data/get_uniform_subsegments.py b/egs/wsj/s5/utils/data/get_uniform_subsegments.py
index c61b96e0dbb..cc3015564a5 100755
--- a/egs/wsj/s5/utils/data/get_uniform_subsegments.py
+++ b/egs/wsj/s5/utils/data/get_uniform_subsegments.py
@@ -4,6 +4,7 @@
 #           2017  Matthew Maciejewski
 # Apache 2.0.
 
+from __future__ import print_function
 import argparse
 import logging
 import sys
diff --git a/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py b/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
index 740b9aa612b..875c238abd5 100755
--- a/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
+++ b/egs/wsj/s5/utils/data/internal/choose_utts_to_combine.py
@@ -89,7 +89,7 @@ def CombineList(min_duration, durations):
     # for each utterance-index i, group_start[i] gives us the
     # start-index of the group of utterances of which it's currently
     # a member.
-    group_start = range(num_utts)
+    group_start = list(range(num_utts))
     # if utterance-index i currently corresponds to the start of a group
     # of utterances, then group_durations[i] is the total duration of
     # that utterance-group, otherwise undefined.
@@ -327,7 +327,7 @@ def GetUtteranceGroups(min_duration, spk2utt, utt2dur):
 utt_groups = GetUtteranceGroups(args.min_duration, spk2utt, utt2dur)
 
 # set utt_group names to an array like [ 'utt1', 'utt2-comb2', 'utt4', ... ]
-utt_group_names = [ group[0] if len(group)==1 else group[0] + "-comb" + str(len(group))
+utt_group_names = [ group[0] if len(group)==1 else "{0}-comb{1}".format(group[0], len(group))
                     for group in utt_groups ]
 
 
diff --git a/egs/wsj/s5/utils/data/internal/perturb_volume.py b/egs/wsj/s5/utils/data/internal/perturb_volume.py
index b3bd4225191..c1dfd936358 100755
--- a/egs/wsj/s5/utils/data/internal/perturb_volume.py
+++ b/egs/wsj/s5/utils/data/internal/perturb_volume.py
@@ -8,6 +8,7 @@
 volume of the recordings and writes to stdout the contents of
 a new wav.scp file.
 """
+from __future__ import print_function
 
 import argparse
 import re
diff --git a/egs/wsj/s5/utils/data/perturb_speed_to_allowed_lengths.py b/egs/wsj/s5/utils/data/perturb_speed_to_allowed_lengths.py
index 7924fc4fcf1..ae16e63c945 100755
--- a/egs/wsj/s5/utils/data/perturb_speed_to_allowed_lengths.py
+++ b/egs/wsj/s5/utils/data/perturb_speed_to_allowed_lengths.py
@@ -60,7 +60,7 @@ def get_args():
     args.speed_perturb = True if args.speed_perturb == 'true' else False
     return args
 
-class Utterance:
+class Utterance(object):
     """ This class represents a Kaldi utterance
         in a data directory like data/train
     """
@@ -321,7 +321,7 @@ def main():
                 "Coverage rate: {}%".format(start_dur, end_dur,
                                       100.0 - args.coverage_factor * 2))
     logger.info("There will be {} unique allowed lengths "
-                "for the utterances.".format(int(math.log(end_dur / start_dur) /
+                "for the utterances.".format(int(math.log(end_dur / start_dur)/
                                                  math.log(args.factor))))
 
     allowed_durations = find_allowed_durations(start_dur, end_dur, args)
diff --git a/egs/wsj/s5/utils/filt.py b/egs/wsj/s5/utils/filt.py
index 2847c0034dd..9201d9e493f 100755
--- a/egs/wsj/s5/utils/filt.py
+++ b/egs/wsj/s5/utils/filt.py
@@ -2,6 +2,7 @@
 
 # Apache 2.0
 
+from __future__ import print_function
 import sys
 
 vocab=set()
@@ -11,4 +12,4 @@
 
 with open(sys.argv[2]) as textfile:
     for line in textfile:
-        print " ".join(map(lambda word: word if word in vocab else '<UNK>', line.strip().split()))
+        print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()]))
diff --git a/egs/wsj/s5/utils/lang/bpe/learn_bpe.py b/egs/wsj/s5/utils/lang/bpe/learn_bpe.py
index 70f18f2d1d9..f6c6d5a0ebb 100755
--- a/egs/wsj/s5/utils/lang/bpe/learn_bpe.py
+++ b/egs/wsj/s5/utils/lang/bpe/learn_bpe.py
@@ -13,6 +13,8 @@
 """
 
 from __future__ import unicode_literals
+from __future__ import division
+from __future__ import print_function
 
 import sys
 import codecs
diff --git a/egs/wsj/s5/utils/lang/internal/arpa2fst_constrained.py b/egs/wsj/s5/utils/lang/internal/arpa2fst_constrained.py
index 19acd311c3d..31dfd08fbd2 100755
--- a/egs/wsj/s5/utils/lang/internal/arpa2fst_constrained.py
+++ b/egs/wsj/s5/utils/lang/internal/arpa2fst_constrained.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 import sys
 import argparse
 import math
@@ -44,7 +45,7 @@
     print(' '.join(sys.argv), file = sys.stderr)
 
 
-class HistoryState:
+class HistoryState(object):
     def __init__(self):
         # note: neither backoff_prob nor the floats
         # in word_to_prob are in log space.
@@ -56,7 +57,7 @@ def __init__(self):
         self.word_to_prob = dict()
 
 
-class ArpaModel:
+class ArpaModel(object):
     def __init__(self):
         # self.orders is indexed by history-length [i.e. 0 for unigram,
         # 1 for bigram and so on], and is then a dict indexed
diff --git a/egs/wsj/s5/utils/lang/make_phone_lm.py b/egs/wsj/s5/utils/lang/make_phone_lm.py
index 47d2a45d229..5cc9a8de832 100755
--- a/egs/wsj/s5/utils/lang/make_phone_lm.py
+++ b/egs/wsj/s5/utils/lang/make_phone_lm.py
@@ -4,6 +4,7 @@
 # Apache 2.0.
 
 from __future__ import print_function
+from __future__ import division
 import sys
 import argparse
 import math
@@ -65,7 +66,7 @@
 
 
 
-class CountsForHistory:
+class CountsForHistory(object):
     ## This class (which is more like a struct) stores the counts seen in a
     ## particular history-state.  It is used inside class NgramCounts.
     ## It really does the job of a dict from int to float, but it also
@@ -77,7 +78,7 @@ def __init__(self):
         self.total_count = 0
 
     def Words(self):
-        return self.word_to_count.keys()
+        return list(self.word_to_count.keys())
 
     def __str__(self):
         # e.g. returns ' total=12 3->4 4->6 -1->2'
@@ -109,7 +110,7 @@ def AddCount(self, predicted_word, count):
         else:
             self.word_to_count[predicted_word] = new_count
 
-class NgramCounts:
+class NgramCounts(object):
     ## A note on data-structure.  Firstly, all words are represented as
     ## integers.  We store n-gram counts as an array, indexed by (history-length
     ## == n-gram order minus one) (note: python calls arrays "lists") of dicts
@@ -187,7 +188,7 @@ def ApplyBackoff(self):
         # there will be no unigram.
         if args.verbose >= 1:
             initial_num_ngrams = self.GetNumNgrams()
-        for n in reversed(range(args.no_backoff_ngram_order, args.ngram_order)):
+        for n in reversed(list(range(args.no_backoff_ngram_order, args.ngram_order))):
             this_order_counts = self.counts[n]
             for hist, counts_for_hist in this_order_counts.items():
                 backoff_hist = hist[1:]
@@ -276,8 +277,8 @@ def PruneEmptyStates(self):
 
         states_removed_per_hist_len = [ 0 ] * args.ngram_order
 
-        for n in reversed(range(args.no_backoff_ngram_order,
-                                args.ngram_order)):
+        for n in reversed(list(range(args.no_backoff_ngram_order,
+                                args.ngram_order))):
             num_states_removed = 0
             for hist, counts_for_hist in self.counts[n].items():
                 l = len(counts_for_hist.word_to_count)
@@ -304,14 +305,14 @@ def EnsureStructurallyNeededNgramsExist(self):
         # we have a unigram state].
         if args.verbose >= 1:
             num_ngrams_initial = self.GetNumNgrams()
-        for n in reversed(range(args.no_backoff_ngram_order,
-                                args.ngram_order)):
+        for n in reversed(list(range(args.no_backoff_ngram_order,
+                                args.ngram_order))):
 
             for hist, counts_for_hist in self.counts[n].items():
                 # This loop ensures that if we have an n-gram like (6, 7, 8) -> 9,
                 # then, say, (7, 8) -> 9 and (8) -> 9 exist.
                 reduced_hist = hist
-                for m in reversed(range(args.no_backoff_ngram_order, n)):
+                for m in reversed(list(range(args.no_backoff_ngram_order, n))):
                     reduced_hist = reduced_hist[1:]  # shift an element off
                                                      # the history.
                     counts_for_backoff_hist = self.counts[m][reduced_hist]
@@ -321,7 +322,7 @@ def EnsureStructurallyNeededNgramsExist(self):
                 # then, say, (6, 7) -> 8 and (6) -> 7 exist.  This will be needed
                 # for FST representations of the ARPA LM.
                 reduced_hist = hist
-                for m in reversed(range(args.no_backoff_ngram_order, n)):
+                for m in reversed(list(range(args.no_backoff_ngram_order, n))):
                     this_word = reduced_hist[-1]
                     reduced_hist = reduced_hist[:-1]  # pop an element off the
                                                       # history
@@ -346,7 +347,7 @@ def PrintAsFst(self, word_disambig_symbol):
         # History will map from history (as a tuple) to integer FST-state.
         hist_to_state = self.GetHistToStateMap()
 
-        for n in [ 1, 0 ] + range(2, args.ngram_order):
+        for n in [ 1, 0 ] + list(range(2, args.ngram_order)):
             this_order_counts = self.counts[n]
             # For order 1, make sure the keys are sorted.
             keys = this_order_counts.keys() if n != 1 else sorted(this_order_counts.keys())
@@ -388,7 +389,7 @@ def GetProtectedNgrams(self):
                 # add the backed-off n-grams (7, 8) -> 9 and (8) -> 9 to
                 # 'protected-ngrams'.
                 reduced_hist = hist
-                for m in reversed(range(args.no_backoff_ngram_order, n)):
+                for m in reversed(list(range(args.no_backoff_ngram_order, n))):
                     reduced_hist = reduced_hist[1:]  # shift an element off
                                                      # the history.
 
@@ -399,7 +400,7 @@ def GetProtectedNgrams(self):
                 # history-state (6, 7, 8), then n-grams (6, 7, 8) and (6, 7) are
                 # protected.  This assures that the FST states are accessible.
                 reduced_hist = hist
-                for m in reversed(range(args.no_backoff_ngram_order, n)):
+                for m in reversed(list(range(args.no_backoff_ngram_order, n))):
                     ans.add(reduced_hist)
                     reduced_hist = reduced_hist[:-1]  # pop an element off the
                                                       # history
@@ -499,7 +500,7 @@ def PruningLogprobChange(self, count, discount, backoff_count, backoff_total):
         # and the 'count' term is zero in the numerator part of the log expression,
         # because symbol 'a' is completely backed off in 'this' state.
         this_a_change = augmented_count * \
-            math.log((new_discount * new_backoff_count / new_backoff_total) / \
+            math.log((new_discount * new_backoff_count / new_backoff_total)/ \
                          augmented_count)
 
         # other_a_change is the log-like change of symbol 'a' coming from all
@@ -511,7 +512,7 @@ def PruningLogprobChange(self, count, discount, backoff_count, backoff_total):
         # doing so gives us an upper bound on the divergence.
         other_a_change = \
             a_other_count * math.log((new_backoff_count / new_backoff_total) / \
-                                         (backoff_count / backoff_total))
+                                         (backoff_count / backoff_total)) 
 
         # b_change is the log-like change of phantom symbol 'b' coming from
         # 'this' state (and note: it only comes from this state, that's how we
diff --git a/egs/wsj/s5/utils/nnet/gen_dct_mat.py b/egs/wsj/s5/utils/nnet/gen_dct_mat.py
index d0f043ad7a4..24139f1c9f8 100755
--- a/egs/wsj/s5/utils/nnet/gen_dct_mat.py
+++ b/egs/wsj/s5/utils/nnet/gen_dct_mat.py
@@ -20,12 +20,20 @@
 # and takes into account that data-layout is along frequency axis, 
 # while DCT is done along temporal axis.
 
+from __future__ import division
+from __future__ import print_function
 from math import *
 import sys
 
 
 from optparse import OptionParser
 
+def print_on_same_line(text):
+    if (sys.version_info > (3,0)):
+        print(text, end=' ')
+    else:
+        print text,
+
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim', help='feature dimension')
 parser.add_option('--splice', dest='splice', help='applied splice value')
@@ -49,19 +57,19 @@
 
 
 #generate sparse DCT matrix
-print '['
+print('[')
 for k in range(dct_basis):
     for m in range(dim):
         for n in range(timeContext):
-          if(n==0): 
-              print m*'0 ',
-          else: 
-              print (dim-1)*'0 ',
-          print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
+          if(n==0):
+              print_on_same_line(m*'0 ')
+          else:
+              print_on_same_line((dim-1)*'0 ')
+          print_on_same_line(str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))))
           if(n==timeContext-1):
-              print (dim-m-1)*'0 ',
-        print
-    print 
+              print_on_same_line((dim-m-1)*'0 ')
+        print()
+    print() 
 
-print ']'
+print(']')
 
diff --git a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
index a4262a8cffd..d7e9d9b7493 100755
--- a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
+++ b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
@@ -18,12 +18,20 @@
 # ./gen_hamm_mat.py
 # script generates diagonal matrix with hamming window values
 
+from __future__ import division
+from __future__ import print_function
 from math import *
 import sys
 
 
 from optparse import OptionParser
 
+def print_on_same_line(text):
+    if (sys.version_info > (3,0)):
+        print(text, end=' ')
+    else:
+        print text,
+
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim', help='feature dimension')
 parser.add_option('--splice', dest='splice', help='applied splice value')
@@ -42,16 +50,16 @@
 
 dim_mat=(2*splice+1)*dim
 timeContext=2*splice+1
-print '['
+print('[')
 for row in range(dim_mat):
     for col in range(dim_mat):
         if col!=row:
-            print '0',
+            print_on_same_line('0')
         else:
             i=int(row/dim)
-            print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
-    print
+            print_on_same_line(str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))))
+    print()
 
-print ']'
+print(']')
 
 
diff --git a/egs/wsj/s5/utils/nnet/gen_splice.py b/egs/wsj/s5/utils/nnet/gen_splice.py
index 0241aeed6ba..3fe76513df6 100755
--- a/egs/wsj/s5/utils/nnet/gen_splice.py
+++ b/egs/wsj/s5/utils/nnet/gen_splice.py
@@ -18,12 +18,19 @@
 # ./gen_splice.py
 # generates <splice> Component
 
+from __future__ import print_function
 from math import *
 import sys
 
 
 from optparse import OptionParser
 
+def print_on_same_line(text):
+    if (sys.version_info > (3,0)):
+        print(text, end=' ')
+    else:
+        print text,
+
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
 parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
@@ -40,12 +47,12 @@
 
 dim_out=(2*splice+1)*dim_in
 
-print '<splice>', dim_out, dim_in
-print '[',
+print('<splice> {0} {1}'.format(dim_out, dim_in))
+print_on_same_line('[')
 
-splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step)
+splice_vec = list(range(-splice*splice_step, splice*splice_step+1, splice_step))
 for idx in range(len(splice_vec)):
-    print splice_vec[idx],
+    print_on_same_line(splice_vec[idx])
 
-print ']'
+print(']')
 
diff --git a/egs/wsj/s5/utils/nnet/make_blstm_proto.py b/egs/wsj/s5/utils/nnet/make_blstm_proto.py
index 6e540ec791a..4d269cfdef0 100755
--- a/egs/wsj/s5/utils/nnet/make_blstm_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_blstm_proto.py
@@ -17,6 +17,7 @@
 
 # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 
+from __future__ import print_function
 import sys
 
 ###
@@ -54,7 +55,7 @@
   parser.print_help()
   sys.exit(1)
 
-(feat_dim, num_leaves) = map(int,args);
+(feat_dim, num_leaves) = [int(i) for i in args];
 
 # Original prototype from Jiayu,
 #<NnetProto>
@@ -77,18 +78,18 @@
 # The BLSTM layers,
 if o.num_layers == 1:
   # Single BLSTM,
-  print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts
+  print("<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts)
 else:
   # >1 BLSTM,
-  print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts
+  print("<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts)
   for l in range(o.num_layers - 2):
-    print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts
-  print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts
+    print("<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts)
+  print("<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts)
 
 # Adding <Tanh> for more stability,
-print "<Tanh> <InputDim> %d <OutputDim> %d" % (2*o.proj_dim_last, 2*o.proj_dim_last)
+print("<Tanh> <InputDim> %d <OutputDim> %d" % (2*o.proj_dim_last, 2*o.proj_dim_last))
 
 # Softmax layer,
-print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (2*o.proj_dim_last, num_leaves) + softmax_affine_opts
-print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
+print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (2*o.proj_dim_last, num_leaves) + softmax_affine_opts)
+print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
 
diff --git a/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py b/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py
index 73455563b51..172660da825 100755
--- a/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_cnn2d_proto.py
@@ -17,6 +17,8 @@
 
 # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 
+from __future__ import division
+from __future__ import print_function
 import math, random, sys, warnings
 from optparse import OptionParser
 
@@ -139,8 +141,8 @@
 assert( (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) % o.cnn1_filt_x_step == 0 )
 
 # subsample1
-cnn1_out_fmap_y_len=((1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step))
-cnn1_out_fmap_x_len=((1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step))
+cnn1_out_fmap_y_len=(1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step)
+cnn1_out_fmap_x_len=(1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step)
 
 # fix filt_len and filt_step
 def fix_filt_step(inp_len, filt_len, filt_step):
@@ -149,7 +151,7 @@ def fix_filt_step(inp_len, filt_len, filt_step):
     return filt_step
   else:
     # filt_step <= filt_len
-    for filt_step in xrange(filt_len, 0, -1):
+    for filt_step in range(filt_len, 0, -1):
       if ((inp_len - filt_len) % filt_step == 0):
         return filt_step
     
@@ -167,29 +169,29 @@ def fix_filt_step(inp_len, filt_len, filt_step):
 ###
 
 # Begin the prototype
-print "<NnetProto>"
+print("<NnetProto>")
 
 # Convolutional part of network
 '''1st CNN layer'''
 cnn1_input_dim=feat_raw_dim * (o.delta_order+1) * (o.splice*2+1)
-cnn1_out_fmap_x_len=((1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step))
-cnn1_out_fmap_y_len=((1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step))
+cnn1_out_fmap_x_len=(1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step)
+cnn1_out_fmap_y_len=(1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step)
 cnn1_output_dim=o.cnn1_num_filters * cnn1_out_fmap_x_len * cnn1_out_fmap_y_len
 
 '''1st Pooling layer'''
 pool1_input_dim=cnn1_output_dim
 pool1_fmap_x_len=cnn1_out_fmap_x_len
-pool1_out_fmap_x_len=((1 + (pool1_fmap_x_len - o.pool1_x_len) / o.pool1_x_step))
+pool1_out_fmap_x_len=(1 + (pool1_fmap_x_len - o.pool1_x_len) / o.pool1_x_step)
 pool1_fmap_y_len=cnn1_out_fmap_y_len
-pool1_out_fmap_y_len=((1 + (pool1_fmap_y_len - o.pool1_y_len) / o.pool1_y_step))
+pool1_out_fmap_y_len=(1 + (pool1_fmap_y_len - o.pool1_y_len) / o.pool1_y_step)
 pool1_output_dim=o.cnn1_num_filters*pool1_out_fmap_x_len*pool1_out_fmap_y_len
 
 '''2nd CNN layer'''
 cnn2_input_dim=pool1_output_dim
 cnn2_fmap_x_len=pool1_out_fmap_x_len
-cnn2_out_fmap_x_len=((1 + (cnn2_fmap_x_len - o.cnn2_filt_x_len) / o.cnn2_filt_x_step))
+cnn2_out_fmap_x_len=(1 + (cnn2_fmap_x_len - o.cnn2_filt_x_len) / o.cnn2_filt_x_step)
 cnn2_fmap_y_len=pool1_out_fmap_y_len
-cnn2_out_fmap_y_len=((1 + (cnn2_fmap_y_len - o.cnn2_filt_y_len) / o.cnn2_filt_y_step))
+cnn2_out_fmap_y_len=(1 + (cnn2_fmap_y_len - o.cnn2_filt_y_len) / o.cnn2_filt_y_step)
 cnn2_output_dim=o.cnn2_num_filters * cnn2_out_fmap_x_len * cnn2_out_fmap_y_len
 
 
@@ -242,14 +244,14 @@ def fix_filt_step(inp_len, filt_len, filt_step):
     vector += '%d:1:%d ' % (i, i + feat_raw_dim - 1)
   for i in range(feat_raw_dim+1, (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), feat_raw_dim + o.pitch_dim):
     vector += '%d:1:%d ' % (i, i + o.pitch_dim - 1)
-  print '<Copy> <InputDim> %d <OutputDim> %d <BuildVector>  %s </BuildVector> ' % \
-	((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), vector)
-  print '<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
-	((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), o.num_pitch_neurons + cnn2_output_dim, '%s/nnet.proto.convolution' % o.dirct, '%s/nnet.proto.pitch' % o.dirct)
+  print('<Copy> <InputDim> %d <OutputDim> %d <BuildVector>  %s </BuildVector> ' % \
+	((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), vector))
+  print('<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
+	((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), o.num_pitch_neurons + cnn2_output_dim, '%s/nnet.proto.convolution' % o.dirct, '%s/nnet.proto.pitch' % o.dirct))
 
   num_convolution_output = o.num_pitch_neurons + cnn2_output_dim
 else: # no pitch
-  print convolution_proto
+  print(convolution_proto)
 
 # We are done!
 sys.exit(0)
diff --git a/egs/wsj/s5/utils/nnet/make_cnn_proto.py b/egs/wsj/s5/utils/nnet/make_cnn_proto.py
index c6aa519ea96..4d8b9ca2946 100755
--- a/egs/wsj/s5/utils/nnet/make_cnn_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_cnn_proto.py
@@ -17,6 +17,8 @@
 
 # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 
+from __future__ import division
+from __future__ import print_function
 import math, random, sys
 from optparse import OptionParser
 
@@ -88,7 +90,7 @@
 ###
 
 # Begin the prototype
-print "<NnetProto>"
+print("<NnetProto>")
 
 # Convolutional part of network
 num_patch1 = 1 + (feat_raw_dim - o.patch_dim1) / o.patch_step1
@@ -150,13 +152,13 @@
     vector += '%d:1:%d ' % (i, i + feat_raw_dim - 1)
   for i in range(feat_raw_dim+1, inputdim_of_cnn + 1, feat_raw_dim + o.pitch_dim):
     vector += '%d:1:%d ' % (i, i + o.pitch_dim - 1)
-  print '<Copy> <InputDim> %d <OutputDim> %d <BuildVector> %s </BuildVector>' % \
-	(inputdim_of_cnn, inputdim_of_cnn, vector)
-  print '<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
-	(inputdim_of_cnn, o.num_pitch_neurons + outputdim_of_cnn, '%s/nnet.proto.convolution' % o.protodir, '%s/nnet.proto.pitch' % o.protodir)
+  print('<Copy> <InputDim> %d <OutputDim> %d <BuildVector> %s </BuildVector>' % \
+	(inputdim_of_cnn, inputdim_of_cnn, vector))
+  print('<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
+	(inputdim_of_cnn, o.num_pitch_neurons + outputdim_of_cnn, '%s/nnet.proto.convolution' % o.protodir, '%s/nnet.proto.pitch' % o.protodir))
 
 else: # no pitch
-  print convolution_proto
+  print(convolution_proto)
 
 # We are done!
 sys.exit(0)
diff --git a/egs/wsj/s5/utils/nnet/make_lstm_proto.py b/egs/wsj/s5/utils/nnet/make_lstm_proto.py
index a2da0a194fc..6818c860ed0 100755
--- a/egs/wsj/s5/utils/nnet/make_lstm_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_lstm_proto.py
@@ -17,6 +17,7 @@
 
 # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 
+from __future__ import print_function
 import sys
 
 ###
@@ -52,7 +53,7 @@
   parser.print_help()
   sys.exit(1)
 
-(feat_dim, num_leaves) = map(int,args);
+(feat_dim, num_leaves) = [int(i) for i in args];
 
 # Original prototype from Jiayu,
 #<NnetProto>
@@ -73,14 +74,14 @@
 if None != o.param_stddev:     softmax_affine_opts += "<ParamStddev> %f " % o.param_stddev
 
 # The LSTM layers,
-print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts
+print("<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts)
 for l in range(o.num_layers - 1):
-  print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts
+  print("<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts)
 
 # Adding <Tanh> for more stability,
-print "<Tanh> <InputDim> %d <OutputDim> %d" % (o.proj_dim, o.proj_dim)
+print("<Tanh> <InputDim> %d <OutputDim> %d" % (o.proj_dim, o.proj_dim))
 
 # Softmax layer,
-print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts
-print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
+print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts)
+print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
 
diff --git a/egs/wsj/s5/utils/nnet/make_nnet_proto.py b/egs/wsj/s5/utils/nnet/make_nnet_proto.py
index 99198cbe44b..4f60be6c9d0 100755
--- a/egs/wsj/s5/utils/nnet/make_nnet_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_nnet_proto.py
@@ -17,6 +17,8 @@
 
 # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 
+from __future__ import division
+from __future__ import print_function
 import math, random, sys, re
 
 ###
@@ -87,7 +89,7 @@
 o.affine_opts = o.affine_opts.replace("_"," ")
 o.dropout_opts = o.dropout_opts.replace("_"," ")
 
-(feat_dim, num_leaves, num_hid_layers, num_hid_neurons) = map(int,args);
+(feat_dim, num_leaves, num_hid_layers, num_hid_neurons) = [int(i) for i in args];
 ### End parse options
 
 
@@ -120,46 +122,46 @@ def Glorot(dim1, dim2):
   assert(num_hid_layers == 0)
   if o.bottleneck_trick:
     # 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
-    print "<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
+    print("<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
      (feat_dim, o.bottleneck_dim, \
-      (o.param_stddev_factor * Glorot(feat_dim, o.bottleneck_dim) * 0.75 ), 0.1)
+      (o.param_stddev_factor * Glorot(feat_dim, o.bottleneck_dim) * 0.75 ), 0.1))
     # 25% smaller stddev -> smaller gradient in prev. layer, 10x smaller learning rate for weigts & biases
-    print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f <MaxNorm> %f" % \
+    print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f <MaxNorm> %f" % \
      (o.bottleneck_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
-      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons) * 0.75 ), 0.1, 0.1, o.max_norm)
+      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons) * 0.75 ), 0.1, 0.1, o.max_norm))
   else:
-    print "<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f" % \
+    print("<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f" % \
      (feat_dim, o.bottleneck_dim, \
-      (o.param_stddev_factor * Glorot(feat_dim, o.bottleneck_dim)))
-    print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
+      (o.param_stddev_factor * Glorot(feat_dim, o.bottleneck_dim))))
+    print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
      (o.bottleneck_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
-      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons)), o.max_norm)
-  print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts) # Non-linearity
+      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons)), o.max_norm))
+  print("%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts)) # Non-linearity
   # Last AffineTransform (10x smaller learning rate on bias)
-  print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
+  print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
    (num_hid_neurons, num_leaves, 0.0, 0.0, \
-    (o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)), 1.0, 0.1)
+    (o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)), 1.0, 0.1))
   # Optionaly append softmax
   if o.with_softmax:
     if o.block_softmax_dims == "":
-      print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
+      print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
     else:
-      print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)
-  print "</NnetProto>"
+      print("<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims))
+  print("</NnetProto>")
   # We are done!
   sys.exit(0)
 
 # NO HIDDEN LAYERS!
 # Add only last layer (logistic regression)
 if num_hid_layers == 0:
-  print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
-        (feat_dim, num_leaves, 0.0, 0.0, (o.param_stddev_factor * Glorot(feat_dim, num_leaves)))
+  print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f" % \
+        (feat_dim, num_leaves, 0.0, 0.0, (o.param_stddev_factor * Glorot(feat_dim, num_leaves))))
   if o.with_softmax:
     if o.block_softmax_dims == "":
-      print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
+      print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
     else:
-      print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)
-  print "</NnetProto>"
+      print("<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims))
+  print("</NnetProto>")
   # We are done!
   sys.exit(0)
 
@@ -170,63 +172,63 @@ def Glorot(dim1, dim2):
 
 # Begin the prototype,
 # First AffineTranform,
-print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
+print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
       (feat_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
        (o.param_stddev_factor * Glorot(feat_dim, num_hid_neurons) * \
-        (math.sqrt(1.0/12.0) if o.smaller_input_weights else 1.0)), o.max_norm, o.affine_opts)
+        (math.sqrt(1.0/12.0) if o.smaller_input_weights else 1.0)), o.max_norm, o.affine_opts))
       # Note.: compensating dynamic range mismatch between input features and Sigmoid-hidden layers,
       # i.e. mapping the std-dev of N(0,1) (input features) to std-dev of U[0,1] (sigmoid-outputs).
       # This is done by multiplying with stddev(U[0,1]) = sqrt(1/12).
       # The stddev of weights is consequently reduced with scale 0.29,
-print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts)
+print("%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts))
 if o.with_dropout:
-  print "<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts)
+  print("<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts))
 
 
 # Internal AffineTransforms,
 for i in range(num_hid_layers-1):
-  print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
+  print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
         (num_hid_neurons, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
-         (o.param_stddev_factor * Glorot(num_hid_neurons, num_hid_neurons)), o.max_norm, o.affine_opts)
-  print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts)
+         (o.param_stddev_factor * Glorot(num_hid_neurons, num_hid_neurons)), o.max_norm, o.affine_opts))
+  print("%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts))
   if o.with_dropout:
-    print "<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts)
+    print("<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts))
 
 # Optionaly add bottleneck,
 if o.bottleneck_dim != 0:
   assert(o.bottleneck_dim > 0)
   if o.bottleneck_trick:
     # 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
-    print "<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
+    print("<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f <LearnRateCoef> %f" % \
      (num_hid_neurons, o.bottleneck_dim, \
-      (o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim) * 0.75 ), 0.1)
+      (o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim) * 0.75 ), 0.1))
     # 25% smaller stddev -> smaller gradient in prev. layer, 10x smaller learning rate for weigts & biases
-    print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f <MaxNorm> %f %s" % \
+    print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f <MaxNorm> %f %s" % \
      (o.bottleneck_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
-      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons) * 0.75 ), 0.1, 0.1, o.max_norm, o.affine_opts)
+      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons) * 0.75 ), 0.1, 0.1, o.max_norm, o.affine_opts))
   else:
     # Same learninig-rate and stddev-formula everywhere,
-    print "<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f" % \
+    print("<LinearTransform> <InputDim> %d <OutputDim> %d <ParamStddev> %f" % \
      (num_hid_neurons, o.bottleneck_dim, \
-      (o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim)))
-    print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
+      (o.param_stddev_factor * Glorot(num_hid_neurons, o.bottleneck_dim))))
+    print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f %s" % \
      (o.bottleneck_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
-      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons)), o.max_norm, o.affine_opts)
-  print "%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts)
+      (o.param_stddev_factor * Glorot(o.bottleneck_dim, num_hid_neurons)), o.max_norm, o.affine_opts))
+  print("%s <InputDim> %d <OutputDim> %d %s" % (o.activation_type, num_hid_neurons, num_hid_neurons, o.activation_opts))
   if o.with_dropout:
-    print "<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts)
+    print("<Dropout> <InputDim> %d <OutputDim> %d %s" % (num_hid_neurons, num_hid_neurons, o.dropout_opts))
 
 # Last AffineTransform (10x smaller learning rate on bias)
-print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
+print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <LearnRateCoef> %f <BiasLearnRateCoef> %f" % \
       (num_hid_neurons, num_leaves, 0.0, 0.0, \
-       (o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)), 1.0, 0.1)
+       (o.param_stddev_factor * Glorot(num_hid_neurons, num_leaves)), 1.0, 0.1))
 
 # Optionaly append softmax
 if o.with_softmax:
   if o.block_softmax_dims == "":
-    print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
+    print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
   else:
-    print "<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims)
+    print("<BlockSoftmax> <InputDim> %d <OutputDim> %d <BlockDims> %s" % (num_leaves, num_leaves, o.block_softmax_dims))
 
 # We are done!
 sys.exit(0)
diff --git a/egs/wsj/s5/utils/prepare_lang.sh b/egs/wsj/s5/utils/prepare_lang.sh
index 6439a136288..7c018fd94f9 100755
--- a/egs/wsj/s5/utils/prepare_lang.sh
+++ b/egs/wsj/s5/utils/prepare_lang.sh
@@ -108,6 +108,11 @@ srcdir=$1
 oov_word=$2
 tmpdir=$3
 dir=$4
+
+
+if [ -d $dir/phones ]; then
+  rm -r $dir/phones
+fi
 mkdir -p $dir $tmpdir $dir/phones
 
 silprob=false
@@ -213,7 +218,6 @@ else
   paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
 fi
 
-mkdir -p $dir/phones  # various sets of phones...
 
 # Sets of phones for use in clustering, and making monophone systems.
 
diff --git a/egs/wsj/s5/utils/reverse_arpa.py b/egs/wsj/s5/utils/reverse_arpa.py
index 5437aec4341..e154a6e0813 100755
--- a/egs/wsj/s5/utils/reverse_arpa.py
+++ b/egs/wsj/s5/utils/reverse_arpa.py
@@ -2,11 +2,12 @@
 # -*- coding: utf-8 -*-
 # Copyright 2012 Mirko Hannemann BUT, mirko.hannemann@gmail.com
 
+from __future__ import print_function
 import sys
 import codecs # for UTF-8/unicode
 
 if len(sys.argv) != 2:
-    print 'usage: reverse_arpa arpa.in'
+    print('usage: reverse_arpa arpa.in')
     sys.exit()
 arpaname = sys.argv[1]
 
@@ -34,13 +35,13 @@
 try:
   file = codecs.open(arpaname, "r", "utf-8")
 except IOError:
-  print 'file not found: ' + arpaname
+  print('file not found: ' + arpaname)
   sys.exit()
 
 text=file.readline()
 while (text and text[:6] != "\\data\\"): text=file.readline()
 if not text:
-  print "invalid ARPA file"
+  print("invalid ARPA file")
   sys.exit()
 #print text,
 while (text and text[:5] != "ngram"): text=file.readline()
@@ -54,7 +55,7 @@
   r = ind[0].split()
   read_n = int(r[1].strip())
   if read_n != n+1:
-    print "invalid ARPA file:", text
+    print("invalid ARPA file: {}".format(text))
     sys.exit()
   n = read_n
   cngrams.append(counts)
@@ -68,7 +69,7 @@
 for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
   while (text and "-grams:" not in text): text=file.readline()
   if n != int(text[1]):
-    print "invalid ARPA file:", text
+    print("invalid ARPA file:{}".format(text))
     sys.exit()
   #print text,cngrams[n-1]
   this_ngrams={} # stores all read ngrams
@@ -115,7 +116,7 @@
 
 while (text and text[:5] != "\\end\\"): text=file.readline()
 if not text:
-  print "invalid ARPA file"
+  print("invalid ARPA file")
   sys.exit()
 file.close()
 #print text,
@@ -133,14 +134,13 @@
 #p(ABCD)+b(ABCD)-p(BCD)+p(ABC)-p(BC)+p(AB)-p(B)+p(A) DCBA 0
 
 # compute new reversed ARPA model
-print "\\data\\"
+print("\\data\\")
 for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
-  print "ngram "+str(n)+"="+str(len(ngrams[n-1].keys()))
+  print("ngram {0} = {1}".format(n, len(ngrams[n-1].keys())))
 offset = 0.0
 for n in range(1,len(cngrams)+1): # unigrams, bigrams, trigrams
-  print "\\"+str(n)+"-grams:"
-  keys = ngrams[n-1].keys()
-  keys.sort()
+  print("\\{}-grams:".format(n))
+  keys = sorted(ngrams[n-1].keys())
   for ngram in keys:
     prob = ngrams[n-1][ngram]
     # reverse word order
@@ -179,10 +179,10 @@
         elif n == 2:
           revprob = revprob + offset # add <s> weight to bigrams starting with <s>
       if (prob[1] != inf): # only backoff weights from not newly created ngrams
-        print revprob,rev_ngram.encode("utf-8"),back
+        print(revprob,rev_ngram.encode("utf-8"),back)
       else:
-        print revprob,rev_ngram.encode("utf-8"),"-100000.0"
+        print(revprob,rev_ngram.encode("utf-8"),"-100000.0")
     else: # highest order - no backoff weights
       if (n==2) and (rev_ngram[:3] == "<s>"): revprob = revprob + offset
-      print revprob,rev_ngram.encode("utf-8")
-print "\\end\\"
+      print(revprob,rev_ngram.encode("utf-8"))
+print("\\end\\")
diff --git a/egs/yomdle_fa/v1/local/create_line_image_from_page_image.py b/egs/yomdle_fa/v1/local/create_line_image_from_page_image.py
index 77a6791d5d7..7135bb1b242 100755
--- a/egs/yomdle_fa/v1/local/create_line_image_from_page_image.py
+++ b/egs/yomdle_fa/v1/local/create_line_image_from_page_image.py
@@ -110,7 +110,7 @@ def bounding_area(index, hull):
     return {'area': len_p * len_o,
             'length_parallel': len_p,
             'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'rectangle_center': (min_p + float(len_p) / 2, min_o + float(len_o) / 2),
             'unit_vector': unit_vector_p,
             }
 
@@ -275,8 +275,8 @@ def get_center(im):
     -------
     (int, int): center of the image
     """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
+    center_x = float(im.size[0]) / 2
+    center_y = float(im.size[1]) / 2
     return int(center_x), int(center_y)
 
 
diff --git a/egs/yomdle_fa/v1/local/gedi2csv.py b/egs/yomdle_fa/v1/local/gedi2csv.py
index 43a07421dd1..0b80c2e80bb 100755
--- a/egs/yomdle_fa/v1/local/gedi2csv.py
+++ b/egs/yomdle_fa/v1/local/gedi2csv.py
@@ -55,7 +55,7 @@ def npbox2string(npar):
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
     
-class GEDI2CSV():
+class GEDI2CSV(object):
 
     """ Initialize the extractor"""
     def __init__(self, logger, args):
diff --git a/egs/yomdle_fa/v1/local/yomdle2csv.py b/egs/yomdle_fa/v1/local/yomdle2csv.py
index 3641de90324..8f208e2d968 100755
--- a/egs/yomdle_fa/v1/local/yomdle2csv.py
+++ b/egs/yomdle_fa/v1/local/yomdle2csv.py
@@ -55,7 +55,7 @@ def npbox2string(npar):
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
 
-class GEDI2CSV():
+class GEDI2CSV(object):
 
     """ Initialize the extractor"""
     def __init__(self, logger, args):
diff --git a/egs/yomdle_korean/README.txt b/egs/yomdle_korean/README.txt
new file mode 100644
index 00000000000..3bf4cc8cd2d
--- /dev/null
+++ b/egs/yomdle_korean/README.txt
@@ -0,0 +1,3 @@
+This directory contains example scripts for OCR on the Yomdle and Slam datasets.
+Training is done on the Yomdle dataset and testing is done on Slam.
+LM rescoring is also done with extra corpus data obtained from various sources
diff --git a/egs/yomdle_korean/v1/cmd.sh b/egs/yomdle_korean/v1/cmd.sh
new file mode 100755
index 00000000000..3d69546dfe8
--- /dev/null
+++ b/egs/yomdle_korean/v1/cmd.sh
@@ -0,0 +1,12 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+export cmd="queue.pl"
diff --git a/egs/yomdle_korean/v1/image b/egs/yomdle_korean/v1/image
new file mode 120000
index 00000000000..1668ee99922
--- /dev/null
+++ b/egs/yomdle_korean/v1/image
@@ -0,0 +1 @@
+../../cifar/v1/image/
\ No newline at end of file
diff --git a/egs/yomdle_korean/v1/local/augment_data.sh b/egs/yomdle_korean/v1/local/augment_data.sh
new file mode 100755
index 00000000000..136bfd24eb2
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/augment_data.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+verticle_shift=0
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"
+
+for set in aug1; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --vertical-shift $verticle_shift \
+    --fliplr false --augment 'random_scale' $datadir/augmentations/$set
+done
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/yomdle_korean/v1/local/chain/compare_wer.sh b/egs/yomdle_korean/v1/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..80f31e0f311
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/chain/compare_wer.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/cnn{1a,1b}"
+  exit 1
+fi
+
+echo "# $0 $*"
+used_epochs=false
+
+echo -n "# System                     "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+echo -n "# WER                        "
+for x in $*; do
+  wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored)             "
+for x in $*; do
+  wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER                        "
+for x in $*; do
+  cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored)             "
+for x in $*; do
+  cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+echo -n "# Final train prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob           "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/yomdle_korean/v1/local/chain/run_cnn_e2eali.sh b/egs/yomdle_korean/v1/local/chain/run_cnn_e2eali.sh
new file mode 120000
index 00000000000..fcf59f917c1
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/chain/run_cnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_e2eali_1b.sh
\ No newline at end of file
diff --git a/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh
new file mode 100755
index 00000000000..cea60a221a1
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                          e2e_cnn_1a
+#                                 score_basic  score_nomalized
+# WER                             13.64        10.6
+# WER (rescored)                  13.13        10.2
+# CER                              2.99         3.0
+# CER (rescored)                   2.88         2.9
+# Final train prob               0.0113
+# Final valid prob               0.0152
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
+# exp/chain/e2e_cnn_1a: num-iters=48 nj=5..8 num-params=3.0M dim=40->352 combine=0.047->0.047 (over 2) logprob:train/valid[31,47,final]=(0.002,0.008,0.011/0.008,0.013,0.015)
+
+set -e
+# configs for 'chain'
+stage=0
+nj=30
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=450
+minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
+cmvn_opts="--norm-means=false --norm-vars=false"
+train_set=train
+lang_decode=data/lang
+decode_e2e=true
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type mono \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.apply-deriv-weights true \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 1500000 \
+    --trainer.num-epochs 3 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 5 \
+    --trainer.optimization.num-jobs-final 8 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
new file mode 100755
index 00000000000..c43d7c669c1
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+# e2eali_1a is the same as 1a but uses the e2e chain model to get the
+# lattice alignments and to build a tree
+
+# local/chain/compare_wer.sh exp/old/chain/cnn_e2eali_1a/
+# System                      cnn_e2eali_1a
+# WER                             15.68
+# CER                              3.18
+# Final train prob              -0.0331
+# Final valid prob              -0.0395
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a/
+# exp/old/chain/cnn_e2eali_1a/: num-iters=33 nj=3..16 num-params=5.2M dim=40->456 combine=-0.035->-0.035 (over 1) xent:train/valid[21,32,final]=(-0.226,-0.175,-0.169/-0.248,-0.202,-0.195) logprob:train/valid[21,32,final]=(-0.039,-0.034,-0.033/-0.046,-0.040,-0.039)
+
+# Normalize scoring
+# WER = 11.7
+# CER = 3.3
+
+set -e -o pipefail
+stage=0
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=false
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+decode_chain=false
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=900" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=4 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=16 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ] && $decode_chain; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ] && $decode_chain; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --beam 12 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+
+  echo "Done. Date: $(date). Results:"
+  local/chain/compare_wer.sh $dir
+fi
diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
new file mode 100755
index 00000000000..8fca9235f46
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+
+# e2eali_1b is the same as e2eali_1a but has fewer CNN layers, smaller
+# l2-regularize, more epochs and uses dropout.
+
+#local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b/
+# System                      cnn_e2eali_1b
+#                                 score_basic  score_nomalized
+# WER                             13.01        10.0
+# WER (rescored)                  12.69         9.6
+# CER                              2.78         3.0
+# CER (rescored)                   2.70         2.8
+# Final train prob              -0.0568
+# Final valid prob              -0.0410
+#steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
+#exp/chain/cnn_e2eali_1b: num-iters=67 nj=3..16 num-params=5.2M dim=40->464 combine=-0.052->-0.052 (over 1) xent:train/valid[43,66,final]=(-0.379,-0.319,-0.304/-0.291,-0.234,-0.227) logprob:train/valid[43,66,final]=(-0.069,-0.058,-0.057/-0.046,-0.041,-0.041)
+set -e -o pipefail
+stage=0
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=1000
+# we don't need extra left/right context for TDNN systems.
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=false
+lang_decode=data/lang
+decode_chain=true
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-8,-4,0,4,8) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=900" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=16 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=16 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
diff --git a/egs/yomdle_korean/v1/local/check_tools.sh b/egs/yomdle_korean/v1/local/check_tools.sh
new file mode 100755
index 00000000000..5b4d3107d3b
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/check_tools.sh
@@ -0,0 +1,43 @@
+#!/bin/bash -u
+
+# Copyright 2015 (c) Johns Hopkins University (Jan Trmal <jtrmal@gmail.com>)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+[ -f ./path.sh ] && . ./path.sh
+set +e
+
+command -v python3 >&/dev/null \
+  || { echo  >&2 "python3 not found on PATH. You will have to install Python3, preferably >= 3.6"; exit 1; }
+
+python3 -c "import numpy"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs numpy installed."
+  exit 1
+fi
+
+python3 -c "import scipy"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs scipy installed."
+  exit 1
+fi
+
+python3 -c "import scipy.misc; scipy.misc.__dict__['imread']"
+if [ $? -ne 0 ] ; then
+  echo >&2 "This recipe needs scipy-image and  Pillow installed."
+  exit 1
+fi
+
+
+exit  0
diff --git a/egs/yomdle_korean/v1/local/extract_features.sh b/egs/yomdle_korean/v1/local/extract_features.sh
new file mode 100755
index 00000000000..3880ebad3e8
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/extract_features.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment='no_aug'
+fliplr=false
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \
+    --feat-dim $feat_dim --fliplr $fliplr --augment_type $augment \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/yomdle_korean/v1/local/normalize_data.py b/egs/yomdle_korean/v1/local/normalize_data.py
new file mode 100755
index 00000000000..fba3e762789
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/normalize_data.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Hossein Hadian
+
+# Apache 2.0
+# This script converts a BPE-encoded text to normal text. It is used in scoring
+
+import sys, io
+import string
+import unicodedata
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+for line in infile:
+  words = line.strip().split()
+  uttid = words[0]
+  transcript = ' '.join(words[1:])
+  text_normalized = unicodedata.normalize('NFC', transcript)
+  output.write(uttid + ' ' + text_normalized + '\n')
diff --git a/egs/yomdle_korean/v1/local/prepare_dict.sh b/egs/yomdle_korean/v1/local/prepare_dict.sh
new file mode 100755
index 00000000000..22db5ae834d
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/prepare_dict.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Copyright      2017  Hossein Hadian
+#                2017  Babak Rekabdar
+#                2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+# This script prepares the dictionary.
+
+set -e
+dir=data/local/dict
+. ./utils/parse_options.sh || exit 1;
+
+mkdir -p $dir
+
+local/prepare_lexicon.py $dir
+
+cut -d' ' -f2- $dir/lexicon.txt | sed 's/SIL//g' | tr ' ' '\n' | sort -u | sed '/^$/d' >$dir/nonsilence_phones.txt || exit 1;
+
+echo '<sil> SIL' >> $dir/lexicon.txt
+
+echo SIL > $dir/silence_phones.txt
+
+echo SIL >$dir/optional_silence.txt
+
+echo -n "" >$dir/extra_questions.txt
diff --git a/egs/yomdle_korean/v1/local/prepare_lexicon.py b/egs/yomdle_korean/v1/local/prepare_lexicon.py
new file mode 100755
index 00000000000..ec8d43d8335
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/prepare_lexicon.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Babak Rekabdar
+#                2017  Hossein Hadian
+#                2017  Chun Chieh Chang
+#                2017  Ashish Arora
+# Apache 2.0
+
+# This script prepares lexicon for BPE. It gets the set of all words that occur in data/train/text.
+# Since this lexicon is based on BPE, it replaces '|' with silence.
+
+import argparse
+import os
+import unicodedata
+parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+parser.add_argument('dir', type=str, help='output path')
+args = parser.parse_args()
+
+### main ###
+lex = {}
+text_path = os.path.join('data', 'train', 'text')
+with open(text_path, 'r', encoding='utf-8') as f:
+    for line in f:
+        line_vect = line.strip().split(' ')
+        for i in range(1, len(line_vect)):
+            char_normalized = unicodedata.normalize('NFD', line_vect[i]).replace('\n', '')
+            characters = list(char_normalized)
+            characters = " ".join([ 'SIL' if char == '|' else char for char in characters])
+            characters = list(characters)
+            characters = "".join([ '<HASH>' if char == '#' else char for char in characters])
+            lex[line_vect[i]] = characters
+
+with open(os.path.join(args.dir, 'lexicon.txt'), 'w', encoding='utf-8') as fp:
+    for key in sorted(lex):
+        fp.write(key + " " + lex[key] + "\n")
diff --git a/egs/yomdle_korean/v1/local/process_corpus.py b/egs/yomdle_korean/v1/local/process_corpus.py
new file mode 100755
index 00000000000..b39030270b7
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/process_corpus.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# Copyright      2018  Ashish Arora
+# Apache 2.0
+# This script reads valid phones and removes the lines in the corpus
+# which have any other phone.
+
+import os
+import sys, io
+
+phone_file = os.path.join('data/local/text/cleaned/phones.txt')
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+phone_dict = dict()
+with open(phone_file, 'r', encoding='utf-8') as phone_fh:
+    for line in phone_fh:
+        line = line.strip().split()[0]
+        phone_dict[line] = line
+
+phone_dict[' '] = ' '
+corpus_text = list()
+for line in infile:
+    text = line.strip()
+    skip_text = False
+    for phone in text:
+        if phone not in phone_dict.keys():
+            skip_text = True
+            break
+    if not skip_text:
+        output.write(text+ '\n')
+
diff --git a/egs/yomdle_korean/v1/local/process_data.py b/egs/yomdle_korean/v1/local/process_data.py
new file mode 100755
index 00000000000..d7546b0a803
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/process_data.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+# Copyright      2018  Ashish Arora
+#                2018  Chun Chieh Chang
+
+""" This script reads the extracted Tamil OCR (yomdle and slam) database files 
+    and creates the following files (for the data subset selected via --dataset):
+    text, utt2spk, images.scp.
+  Eg. local/process_data.py data/download/ data/local/splits/train.txt data/train
+
+  Eg. text file: english_phone_books_0001_1 To sum up, then, it would appear that
+      utt2spk file: english_phone_books_0001_0 english_phone_books_0001
+      images.scp file: english_phone_books_0001_0 \
+      data/download/truth_line_image/english_phone_books_0001_0.png
+"""
+
+import argparse
+import os
+import sys
+import csv
+import itertools
+import unicodedata
+import re
+import string
+import unicodedata
+parser = argparse.ArgumentParser(description="Creates text, utt2spk, and images.scp files")
+parser.add_argument('database_path', type=str, help='Path to data')
+parser.add_argument('data_split', type=str, help='Path to file that contain datasplits')
+parser.add_argument('out_dir', type=str, help='directory to output files')
+args = parser.parse_args()
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+with open(args.data_split) as f:
+    for line in f:
+        line = line.strip()
+        image_id = line
+        image_filename = image_id + '.png'
+        image_filepath = os.path.join(args.database_path, 'truth_line_image', image_filename)
+        if not os.path.isfile (image_filepath):
+            print("File does not exist {}".format(image_filepath))
+            continue
+        line_id = int(line.split('_')[-1])
+        csv_filename = '_'.join(line.split('_')[:-1]) + '.csv'
+        csv_filepath = os.path.join(args.database_path, 'truth_csv', csv_filename)
+        csv_file = open(csv_filepath, 'r', encoding='utf-8')
+        for row in csv.reader(csv_file):
+            if row[1] == image_filename:
+                text = row[11]
+                text_vect = text.split() # this is to avoid non-utf-8 spaces
+                text = " ".join(text_vect)
+                #text_normalized = unicodedata.normalize('NFD', text).replace('\n', '')
+                if not text:
+                    continue
+                text_fh.write(image_id + ' ' + text + '\n')
+                utt2spk_fh.write(image_id + ' ' + '_'.join(line.split('_')[:-1]) + '\n')
+                image_fh.write(image_id + ' ' + image_filepath +  '\n')
diff --git a/egs/yomdle_korean/v1/local/score.sh b/egs/yomdle_korean/v1/local/score.sh
new file mode 100755
index 00000000000..31564d25326
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/score.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
new file mode 100755
index 00000000000..654880fcf59
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+#           2018  Ashish Arora
+# Apache 2.0
+# This script is semi-supervised recipe with 25k line images of supervised data
+# and 22k line images of unsupervised data with naive splitting.
+# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI",
+# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018
+# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf
+# local/semisup/run_semisup.sh shows how to call this.
+
+# We use 3-gram LM trained on 5M lines of auxilary data.
+# This script uses the same tree as that for the seed model.
+# Unsupervised set: train_unsup (25k tamil line images)
+# unsup_frames_per_eg=150
+# Deriv weights: Lattice posterior of best path pdf
+# Unsupervised weight: 1.0
+# Weights for phone LM (supervised, unsupervised): 3,2
+# LM for decoding unsupervised data: 4gram
+# Supervision: Naive split lattices
+# output-0 and output-1 are for superivsed and unsupervised data respectively.
+
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b/ exp/semisup_100k/chain/tdnn_semisup_1a/
+# System                      cnn_e2eali_1b tdnn_semisup_1a
+# WER                             15.06     13.83
+# CER                              3.15      2.83
+# Final train prob              -0.0343    0.6103-0.0360
+# Final valid prob              -0.0403    0.6054-0.0418
+
+# steps/info/chain_dir_info.pl exp/semisup_100k/chain/tdnn_semisup_1a/
+# exp/semisup_100k/chain/tdnn_semisup_1a/: num-iters=58 nj=6..16 num-params=3.7M dim=40->456 combine=0.240->0.240 (over 1)
+
+# Normalize scoring
+#WER = 10.4
+#CER = 2.9
+
+set -u -e -o pipefail
+
+stage=0   # Start from -1 for supervised seed system training
+train_stage=-100
+nj=30
+test_nj=30
+
+# The following 3 options decide the output directory for semi-supervised 
+# chain system
+# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix}
+exp_root=exp/semisup_100k
+chain_affix=    # affix for chain dir
+tdnn_affix=_semisup_1a  # affix for semi-supervised chain system
+
+# Datasets-Expects supervised_set and unsupervised_set
+supervised_set=train
+unsupervised_set=train_unsup
+
+# Input seed system
+sup_chain_dir=exp/chain/cnn_e2eali_1b  # supervised chain system
+sup_lat_dir=exp/chain/e2e_train_lats  # Seed model options
+sup_tree_dir=exp/chain/tree_e2e  # tree directory for supervised chain system
+
+# Semi-supervised options
+supervision_weights=1.0,1.0   # Weights for supervised, unsupervised data egs.
+                              # Can be used to scale down the effect of unsupervised data
+                              # by using a smaller scale for it e.g. 1.0,0.3
+lm_weights=3,2  # Weights on phone counts from supervised, unsupervised data for denominator FST creation
+
+sup_egs_dir=   # Supply this to skip supervised egs creation
+unsup_egs_dir=  # Supply this to skip unsupervised egs creation
+unsup_egs_opts=  # Extra options to pass to unsupervised egs creation
+# Neural network opts
+xent_regularize=0.1
+tdnn_dim=450
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
+. ./utils/parse_options.sh
+
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+dir=$exp_root/chain$chain_affix/tdnn$tdnn_affix
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+graphdir=$sup_chain_dir/graph_unsup
+for f in data/$supervised_set/feats.scp \
+  data/$unsupervised_set/feats.scp \
+  $sup_lat_dir/lat.1.gz $sup_tree_dir/ali.1.gz \
+  $lang_decode/G.fst; do
+  if [ ! -f $f ]; then
+    echo "$0: Could not find file $f"
+    exit 1
+  fi
+done
+
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir
+fi
+
+# Decode unsupervised data and write lattices in non-compact
+# undeterminized format
+if [ $stage -le 5 ]; then
+  steps/nnet3/decode_semisup.sh --num-threads 4 --nj $nj --cmd "$cmd" --beam 12 \
+            --frames-per-chunk 340 \
+            --acwt 1.0 --post-decode-acwt 10.0 --write-compact false --skip-scoring true \
+            --scoring-opts "--min-lmwt 6 --max-lmwt 6" --word-determinize false \
+            $graphdir data/$unsupervised_set $sup_chain_dir/decode_$unsupervised_set
+fi
+
+# Get best path alignment and lattice posterior of best path alignment to be
+# used as frame-weights in lattice-based training
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${cmd}" --acwt 0.1 \
+    data/$unsupervised_set \
+    $sup_chain_dir/decode_${unsupervised_set} \
+    $sup_chain_dir/best_path_$unsupervised_set
+fi
+
+frame_subsampling_factor=4
+if [ -f $sup_chain_dir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=$(cat $sup_chain_dir/frame_subsampling_factor)
+fi
+cmvn_opts=$(cat $sup_chain_dir/cmvn_opts) || exit 1
+
+diff $sup_tree_dir/tree $sup_chain_dir/tree || { echo "$0: $sup_tree_dir/tree and $sup_chain_dir/tree differ"; exit 1; }
+
+# Train denominator FST using phone alignments from
+# supervised and unsupervised data
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$cmd" \
+    --lm_opts '--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000' \
+    $sup_tree_dir $sup_chain_dir/best_path_$unsupervised_set \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+
+  # We use separate outputs for supervised and unsupervised data
+  # so we can properly track the train and valid objectives.
+  output name=output-0 input=output.affine
+  output name=output-1 input=output.affine
+  output name=output-0-xent input=output-xent.log-softmax
+  output name=output-1-xent input=output-xent.log-softmax
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+# Get values for $model_left_context, $model_right_context
+. $dir/configs/vars
+
+left_context=$model_left_context
+right_context=$model_right_context
+
+egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)")
+egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)")
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_$supervised_set
+  frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    mkdir -p $sup_egs_dir/
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$cmd" \
+               --left-tolerance 3 --right-tolerance 3 \
+               --left-context $egs_left_context --right-context $egs_right_context \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 1 \
+               --frames-overlap-per-eg 0 --constrained false \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 2000000 \
+               --cmvn-opts "$cmvn_opts" \
+               --generate-egs-scp true \
+               data/${supervised_set} $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsup_frames_per_eg=340,300,200,100  # Using a frames-per-eg of 150 for unsupervised data
+                         # was found to be better than allowing smaller chunks
+                         # (160,140,110,80) like for supervised system
+lattice_lm_scale=0.5  # lm-scale for using the weights from unsupervised lattices when
+                      # creating numerator supervision
+lattice_prune_beam=6.0  # beam for pruning the lattices prior to getting egs
+                        # for unsupervised data
+tolerance=3   # frame-tolerance for chain training
+
+unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_$unsupervised_set
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    mkdir -p $unsup_egs_dir
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh \
+      --cmd "$cmd" --alignment-subsampling-factor 1 \
+      --left-tolerance $tolerance --right-tolerance $tolerance \
+      --left-context $egs_left_context --right-context $egs_right_context \
+      --frames-per-eg $unsup_frames_per_eg --frames-per-iter 2000000 \
+      --frame-subsampling-factor $frame_subsampling_factor \
+      --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+      --lattice-prune-beam "$lattice_prune_beam" \
+      --deriv-weights-scp $sup_chain_dir/best_path_$unsupervised_set/weights.scp \
+      --generate-egs-scp true $unsup_egs_opts \
+      data/$unsupervised_set $dir \
+      $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/comb_egs
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$cmd" \
+    --block-size 64 \
+    --lang2weight $supervision_weights 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $train_stage -le -4 ]; then
+  # This is to skip stages of den-fst creation, which was already done.
+  train_stage=-4
+fi
+
+chunk_width=340,300,200,100
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --egs.chunk-width=$chunk_width \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00001 \
+    --chain.apply-deriv-weights=true \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=900" \
+    --trainer.srand=0 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs 5 \
+    --trainer.optimization.num-jobs-initial 6 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs false \
+    --feat-dir data/$supervised_set \
+    --tree-dir $sup_tree_dir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir || exit 1;
+
+fi
+
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph
+fi
+
+if [ $stage -le 18 ]; then
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --beam 12 --frames-per-chunk 340 --nj $nj --cmd "$cmd" \
+      $dir/graph data/test $dir/decode_test
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+exit 0;
+
diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
new file mode 100755
index 00000000000..eb688151665
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh
@@ -0,0 +1,325 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+#           2018  Ashish Arora
+# Apache 2.0
+# This script is semi-supervised recipe with 25k line images of supervised data
+# and 22k line images of unsupervised data with naive splitting.
+# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI",
+# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018
+# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf
+# local/semisup/run_semisup.sh shows how to call this.
+
+# We use 3-gram LM trained on 5M lines of auxilary data.
+# This script uses the same tree as that for the seed model.
+# Unsupervised set: train_unsup (25k tamil line images)
+# unsup_frames_per_eg=150
+# Deriv weights: Lattice posterior of best path pdf
+# Unsupervised weight: 1.0
+# Weights for phone LM (supervised, unsupervised): 3,2
+# LM for decoding unsupervised data: 4gram
+# Supervision: Naive split lattices
+# output-0 and output-1 are for superivsed and unsupervised data respectively.
+
+# local/chain/compare_wer.sh exp/semisup_100k/chain/tdnn_semisup_1b/
+# System                      tdnn_semisup_1b
+#                                 score_basic    score_normalized
+# WER                             13.73          10.2
+# WER (rescored)                  12.80           9.4
+# CER                              2.78           2.8
+# CER (rescored)                   2.57           2.7
+# Final train prob           0.6138-0.0337
+# Final valid prob           0.6115-0.0399
+
+# steps/info/chain_dir_info.pl exp/semisup_100k/chain/tdnn_semisup_1b/
+# exp/semisup_100k/chain/tdnn_semisup_1b/: num-iters=46 nj=6..16 num-params=5.7M dim=40->456 combine=0.239->0.239 (over 1)
+
+set -u -e -o pipefail
+stage=0   # Start from -1 for supervised seed system training
+train_stage=-100
+nj=30
+test_nj=30
+
+# The following 3 options decide the output directory for semi-supervised 
+# chain system
+# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix}
+exp_root=exp/semisup_100k
+chain_affix=    # affix for chain dir
+tdnn_affix=_semisup_1b  # affix for semi-supervised chain system
+
+# Datasets-Expects supervised_set and unsupervised_set
+supervised_set=train
+unsupervised_set=train_unsup
+
+# Input seed system
+sup_chain_dir=exp/chain/cnn_e2eali_1b  # supervised chain system
+sup_lat_dir=exp/chain/e2e_train_lats  # Seed model options
+sup_tree_dir=exp/chain/tree_e2e  # tree directory for supervised chain system
+
+# Semi-supervised options
+supervision_weights=1.0,1.0   # Weights for supervised, unsupervised data egs.
+                              # Can be used to scale down the effect of unsupervised data
+                              # by using a smaller scale for it e.g. 1.0,0.3
+lm_weights=3,2  # Weights on phone counts from supervised, unsupervised data for denominator FST creation
+
+sup_egs_dir=   # Supply this to skip supervised egs creation
+unsup_egs_dir=  # Supply this to skip unsupervised egs creation
+unsup_egs_opts=  # Extra options to pass to unsupervised egs creation
+# Neural network opts
+xent_regularize=0.1
+tdnn_dim=550
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+if [ -f ./path.sh ]; then . ./path.sh; fi
+. ./utils/parse_options.sh
+
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+dir=$exp_root/chain$chain_affix/tdnn$tdnn_affix
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+graphdir=$sup_chain_dir/graph_unsup
+for f in data/$supervised_set/feats.scp \
+  data/$unsupervised_set/feats.scp \
+  $sup_lat_dir/lat.1.gz $sup_tree_dir/ali.1.gz \
+  $lang_decode/G.fst; do
+  if [ ! -f $f ]; then
+    echo "$0: Could not find file $f"
+    exit 1
+  fi
+done
+
+if [ ! -f $graphdir/HCLG.fst ]; then
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir
+fi
+
+# Decode unsupervised data and write lattices in non-compact
+# undeterminized format
+if [ $stage -le 5 ]; then
+  steps/nnet3/decode_semisup.sh --num-threads 4 --nj $nj --cmd "$cmd" --beam 12 \
+            --frames-per-chunk 340 \
+            --acwt 1.0 --post-decode-acwt 10.0 --write-compact false --skip-scoring true \
+            --scoring-opts "--min-lmwt 6 --max-lmwt 6" --word-determinize false \
+            $graphdir data/$unsupervised_set $sup_chain_dir/decode_$unsupervised_set
+fi
+
+# Get best path alignment and lattice posterior of best path alignment to be
+# used as frame-weights in lattice-based training
+if [ $stage -le 8 ]; then
+  steps/best_path_weights.sh --cmd "${cmd}" --acwt 0.1 \
+    data/$unsupervised_set \
+    $sup_chain_dir/decode_${unsupervised_set} \
+    $sup_chain_dir/best_path_$unsupervised_set
+fi
+
+frame_subsampling_factor=5
+if [ -f $sup_chain_dir/frame_subsampling_factor ]; then
+  frame_subsampling_factor=$(cat $sup_chain_dir/frame_subsampling_factor)
+fi
+cmvn_opts=$(cat $sup_chain_dir/cmvn_opts) || exit 1
+
+diff $sup_tree_dir/tree $sup_chain_dir/tree || { echo "$0: $sup_tree_dir/tree and $sup_chain_dir/tree differ"; exit 1; }
+
+# Train denominator FST using phone alignments from
+# supervised and unsupervised data
+if [ $stage -le 10 ]; then
+  steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$cmd" \
+    --lm_opts '--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000' \
+    $sup_tree_dir $sup_chain_dir/best_path_$unsupervised_set \
+    $dir
+fi
+
+if [ $stage -le 11 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=90"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-dropout-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+
+  # We use separate outputs for supervised and unsupervised data
+  # so we can properly track the train and valid objectives.
+  output name=output-0 input=output.affine
+  output name=output-1 input=output.affine
+  output name=output-0-xent input=output-xent.log-softmax
+  output name=output-1-xent input=output-xent.log-softmax
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+# Get values for $model_left_context, $model_right_context
+. $dir/configs/vars
+
+left_context=$model_left_context
+right_context=$model_right_context
+
+egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)")
+egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)")
+
+if [ -z "$sup_egs_dir" ]; then
+  sup_egs_dir=$dir/egs_$supervised_set
+  frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg)
+
+  if [ $stage -le 12 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage
+    fi
+    mkdir -p $sup_egs_dir/
+    touch $sup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the supervised data"
+    steps/nnet3/chain/get_egs.sh --cmd "$cmd" \
+               --left-tolerance 3 --right-tolerance 3 \
+               --left-context $egs_left_context --right-context $egs_right_context \
+               --frame-subsampling-factor $frame_subsampling_factor \
+               --alignment-subsampling-factor 1 \
+               --frames-overlap-per-eg 0 --constrained false \
+               --frames-per-eg $frames_per_eg \
+               --frames-per-iter 2000000 \
+               --cmvn-opts "$cmvn_opts" \
+               --generate-egs-scp true \
+               data/${supervised_set} $dir \
+               $sup_lat_dir $sup_egs_dir
+  fi
+else
+  frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg)
+fi
+
+unsup_frames_per_eg=340,300,200,100  # Using a frames-per-eg of 150 for unsupervised data
+                         # was found to be better than allowing smaller chunks
+                         # (160,140,110,80) like for supervised system
+lattice_lm_scale=0.5  # lm-scale for using the weights from unsupervised lattices when
+                      # creating numerator supervision
+lattice_prune_beam=6.0  # beam for pruning the lattices prior to getting egs
+                        # for unsupervised data
+tolerance=3   # frame-tolerance for chain training
+
+unsup_lat_dir=$sup_chain_dir/decode_$unsupervised_set
+if [ -z "$unsup_egs_dir" ]; then
+  unsup_egs_dir=$dir/egs_$unsupervised_set
+
+  if [ $stage -le 13 ]; then
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then
+      utils/create_split_dir.pl \
+       /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage
+    fi
+    mkdir -p $unsup_egs_dir
+    touch $unsup_egs_dir/.nodelete # keep egs around when that run dies.
+
+    echo "$0: generating egs from the unsupervised data"
+    steps/nnet3/chain/get_egs.sh \
+      --cmd "$cmd" --alignment-subsampling-factor 1 \
+      --left-tolerance $tolerance --right-tolerance $tolerance \
+      --left-context $egs_left_context --right-context $egs_right_context \
+      --frames-per-eg $unsup_frames_per_eg --frames-per-iter 2000000 \
+      --frame-subsampling-factor $frame_subsampling_factor \
+      --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \
+      --lattice-prune-beam "$lattice_prune_beam" \
+      --deriv-weights-scp $sup_chain_dir/best_path_$unsupervised_set/weights.scp \
+      --generate-egs-scp true $unsup_egs_opts \
+      data/$unsupervised_set $dir \
+      $unsup_lat_dir $unsup_egs_dir
+  fi
+fi
+
+comb_egs_dir=$dir/comb_egs
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$cmd" \
+    --block-size 64 \
+    --lang2weight $supervision_weights 2 \
+    $sup_egs_dir $unsup_egs_dir $comb_egs_dir
+  touch $comb_egs_dir/.nodelete # keep egs around when that run dies.
+fi
+
+if [ $train_stage -le -4 ]; then
+  # This is to skip stages of den-fst creation, which was already done.
+  train_stage=-4
+fi
+
+chunk_width=340,300,200,100
+if [ $stage -le 15 ]; then
+  steps/nnet3/chain/train.py --stage $train_stage \
+    --egs.dir "$comb_egs_dir" \
+    --egs.chunk-width=$chunk_width \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00001 \
+    --chain.apply-deriv-weights=true \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=900" \
+    --trainer.srand=0 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs 16 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.num-jobs-initial 6 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs false \
+    --feat-dir data/$supervised_set \
+    --tree-dir $sup_tree_dir \
+    --lat-dir $sup_lat_dir \
+    --dir $dir || exit 1;
+
+fi
+
+if [ $stage -le 17 ]; then
+  # Note: it might appear that this $lang directory is mismatched, and it is as
+  # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
+  # the lang directory.
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph
+fi
+
+if [ $stage -le 18 ]; then
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --beam 12 --frames-per-chunk 340 --nj $nj --cmd "$cmd" \
+      $dir/graph data/test $dir/decode_test
+
+    steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+exit 0;
+
diff --git a/egs/yomdle_korean/v1/local/semisup/process_data.py b/egs/yomdle_korean/v1/local/semisup/process_data.py
new file mode 100755
index 00000000000..94ad770ec2d
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/semisup/process_data.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright      2018  Ashish Arora
+#                2018  Chun Chieh Chang
+
+""" This script reads the slam boxed Tamil OCR dataset and creates the following
+    files utt2spk, images.scp. Since boxed data do not have transcripts, it do not
+    creates text file. It is created as a separate script, because the data that
+    local/process_data.py is processing contains some empty transcripts which 
+    should be removed or it will create bug while applying BPE.
+
+  Eg. local/semisup/process_data.py data/download/ data/local/splits/train_unsup.txt
+        data/train_unsup
+
+  Eg. utt2spk file: english_phone_books_0001_0 english_phone_books_0001
+      images.scp file: english_phone_books_0001_0 \
+      data/download/truth_line_image/english_phone_books_0001_0.png
+"""
+import argparse
+import os
+import sys
+import csv
+import itertools
+import unicodedata
+import re
+import string
+parser = argparse.ArgumentParser(description="Creates text, utt2spk, and images.scp files")
+parser.add_argument('database_path', type=str, help='Path to data')
+parser.add_argument('data_split', type=str, help='Path to file that contain datasplits')
+parser.add_argument('out_dir', type=str, help='directory to output files')
+args = parser.parse_args()
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+
+with open(args.data_split) as f:
+    for line in f:
+        line = line.strip()
+        image_id = line
+        image_filename = image_id + '.png'
+        image_filepath = os.path.join(args.database_path, 'truth_line_image', image_filename)
+        if not os.path.isfile (image_filepath):
+            print("File does not exist {}".format(image_filepath))
+            continue
+        line_id = int(line.split('_')[-1])
+        csv_filename = '_'.join(line.split('_')[:-1]) + '.csv'
+        csv_filepath = os.path.join(args.database_path, 'truth_csv', csv_filename)
+        csv_file = open(csv_filepath, 'r', encoding='utf-8')
+        for row in csv.reader(csv_file):
+            if row[1] == image_filename:
+                text = 'semisup'
+                text_fh.write(image_id + ' ' + text + '\n')
+                utt2spk_fh.write(image_id + ' ' + '_'.join(line.split('_')[:-1]) + '\n')
+                image_fh.write(image_id + ' ' + image_filepath +  '\n')
diff --git a/egs/yomdle_korean/v1/local/semisup/run_semisup.sh b/egs/yomdle_korean/v1/local/semisup/run_semisup.sh
new file mode 100755
index 00000000000..5e20f50c99e
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/semisup/run_semisup.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright 2017  Vimal Manohar
+#           2018  Ashish Arora
+# Apache 2.0
+
+# This script demonstrates semi-supervised training using 25k line images of 
+# supervised data and 22k line images of unsupervised data.
+# We assume the supervised data is in data/train and unsupervised data
+# is in data/train_unsup. 
+# For LM training, we use 5 million lines of tamil text.
+
+set -e
+set -o pipefail
+stage=0
+nj=30
+exp_root=exp/semisup_56k
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+mkdir -p data/train_unsup/data
+if [ $stage -le 0 ]; then
+  echo "stage 0: Processing train unsupervised data...$(date)"
+  local/semisup/process_data.py data/download/ \
+    data/local/splits/train_unsup.txt \
+    data/train_unsup
+  image/fix_data_dir.sh data/train_unsup
+fi
+
+if [ $stage -le 1 ]; then
+  echo "stage 1: Obtaining image groups. calling get_image2num_frames..."
+  image/get_image2num_frames.py --feat-dim 40 data/train_unsup
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train_unsup
+  echo "Extracting features and calling compute_cmvn_stats: $(date) "
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train_unsup
+  steps/compute_cmvn_stats.sh data/train_unsup || exit 1;
+  image/fix_data_dir.sh data/train_unsup
+fi
+
+for f in data/train/utt2spk data/train_unsup/utt2spk \
+  data/train/text; do
+  if [ ! -f $f ]; then
+    echo "$0: Could not find $f"
+    exit 1;
+  fi
+done
+
+# Prepare semi-supervised train set 
+if [ $stage -le 1 ]; then
+  utils/combine_data.sh data/semisup100k_250k \
+    data/train data/train_unsup || exit 1
+fi
+
+###############################################################################
+# Semi-supervised training using 25k line images supervised data and 
+# 22k hours unsupervised data. We use tree, lattices 
+# and seed chain system from the previous stage.
+###############################################################################
+if [ $stage -le 2 ]; then
+  local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh \
+    --supervised-set train \
+    --unsupervised-set train_unsup \
+    --sup-chain-dir exp/chain/cnn_e2eali_1b_ep16_7cnn \
+    --sup-lat-dir exp/chain/e2e_train_lats \
+    --sup-tree-dir exp/chain/tree_e2e \
+    --chain-affix "" \
+    --tdnn-affix _semisup_ep16_7cnn \
+    --stage 15 --train_stage 9 \
+    --exp-root $exp_root || exit 1
+fi
diff --git a/egs/yomdle_korean/v1/local/train_lm.sh b/egs/yomdle_korean/v1/local/train_lm.sh
new file mode 100755
index 00000000000..c73c42fb7dc
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/train_lm.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+# Copyright 2016  Vincent Nguyen
+#           2016  Johns Hopkins University (author: Daniel Povey)
+#           2017  Ashish Arora
+#           2017  Hossein Hadian
+# Apache 2.0
+#
+# This script trains a LM on the training transcriptions and corpus text.
+# It is based on the example scripts distributed with PocoLM
+
+# It will check if pocolm is installed and if not will proceed with installation
+
+set -e
+stage=0
+dir=data/local/local_lm
+order=6
+echo "$0 $@"  # Print the command line for logging
+. ./utils/parse_options.sh || exit 1;
+
+lm_dir=${dir}/data
+
+
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+( # First make sure the pocolm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d pocolm ]; then
+   echo Not installing the pocolm toolkit since it is already there.
+ else
+   echo "$0: Please install the PocoLM toolkit with: "
+   echo " cd ../../../tools; extras/install_pocolm.sh; cd -"
+   exit 1;
+ fi
+) || exit 1;
+
+bypass_metaparam_optim_opt=
+# If you want to bypass the metaparameter optimization steps with specific metaparameters
+# un-comment the following line, and change the numbers to some appropriate values.
+# You can find the values from output log of train_lm.py.
+# These example numbers of metaparameters is for 4-gram model (with min-counts)
+# running with train_lm.py.
+# The dev perplexity should be close to the non-bypassed model.
+#bypass_metaparam_optim_opt="--bypass-metaparameter-optimization=0.031,0.860,0.678,0.194,0.037,0.006,0.928,0.712,0.454,0.220,0.926,0.844,0.749,0.358,0.966,0.879,0.783,0.544,0.966,0.826,0.674,0.450"
+# Note: to use these example parameters, you may need to remove the .done files
+# to make sure the make_lm_dir.py be called and tain only 3-gram model
+#for order in 3; do
+#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
+
+if [ $stage -le 0 ]; then
+  mkdir -p ${dir}/data
+  mkdir -p ${dir}/data/text
+
+  echo "$0: Getting the Data sources"
+
+  rm ${dir}/data/text/* 2>/dev/null || true
+
+  # use the validation data as the dev set.
+  # Note: the name 'dev' is treated specially by pocolm, it automatically
+  # becomes the dev set.
+
+  cat data/local/text/cleaned/bpe_val.txt  > ${dir}/data/text/dev.txt
+  # use the training data as an additional data source.
+  # we can later fold the dev data into this.
+  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
+  cat data/local/text/cleaned/bpe_corpus.txt > ${dir}/data/text/corpus_text.txt
+  # for reporting perplexities, we'll use the "real" dev set.
+  # (the validation data is used as ${dir}/data/text/dev.txt to work
+  # out interpolation weights.)
+  # note, we can't put it in ${dir}/data/text/, because then pocolm would use
+  # it as one of the data sources.
+  cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
+
+  # get the wordlist from train and corpus text
+  cat ${dir}/data/text/{train,corpus_text}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
+fi
+
+if [ $stage -le 1 ]; then
+  # decide on the vocabulary.
+  # Note: you'd use --wordlist if you had a previously determined word-list
+  # that you wanted to use.
+  # Note: if you have more than one order, use a certain amount of words as the
+  # vocab and want to restrict max memory for 'sort',
+  echo "$0: training the unpruned LM"
+  min_counts='train=1'
+  wordlist=${dir}/data/wordlist
+
+  lm_name="`basename ${wordlist}`_${order}"
+  if [ -n "${min_counts}" ]; then
+    lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
+  fi
+  unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+  train_lm.py  --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \
+               --limit-unk-history=true \
+               ${bypass_metaparam_optim_opt} \
+               ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: pruning the LM (to larger size)"
+  # Using 10 million n-grams for a big LM for rescoring purposes.
+  size=10000000
+  prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+  #[perplexity = 22.0613098868] over 151116.0 words
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: pruning the LM (to smaller size)"
+  # Using 2 million n-grams for a smaller LM for graph building.  Prune from the
+  # bigger-pruned LM, it'll be faster.
+  size=2000000
+  prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
+  #[perplexity = 23.4801171202] over 151116.0 words
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
+fi
diff --git a/egs/yomdle_korean/v1/local/wer_output_filter b/egs/yomdle_korean/v1/local/wer_output_filter
new file mode 100755
index 00000000000..59e364e0231
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/wer_output_filter
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Hossein Hadian
+
+# Apache 2.0
+# This script converts a BPE-encoded text to normal text. It is used in scoring
+
+import sys, io
+import string
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+for line in infile:
+  words = line.strip().split()
+  uttid = words[0]
+  transcript = ''.join(words[1:])
+  transcript = transcript.replace('|', ' ')
+  output.write(uttid + ' ' + transcript + '\n')
diff --git a/egs/yomdle_korean/v1/local/yomdle b/egs/yomdle_korean/v1/local/yomdle
new file mode 120000
index 00000000000..2c4544c1399
--- /dev/null
+++ b/egs/yomdle_korean/v1/local/yomdle
@@ -0,0 +1 @@
+../../../yomdle_tamil/v1/local/yomdle/
\ No newline at end of file
diff --git a/egs/yomdle_korean/v1/path.sh b/egs/yomdle_korean/v1/path.sh
new file mode 100755
index 00000000000..2d17b17a84a
--- /dev/null
+++ b/egs/yomdle_korean/v1/path.sh
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/yomdle_korean/v1/run_end2end.sh b/egs/yomdle_korean/v1/run_end2end.sh
new file mode 100755
index 00000000000..65f5beb4b08
--- /dev/null
+++ b/egs/yomdle_korean/v1/run_end2end.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+
+# Copyright 2018    Hossein Hadian
+#                   Ashish Arora
+#                   Jonathan Chang
+# Apache 2.0
+
+set -e
+stage=0
+nj=30
+
+language_main=Korean
+slam_dir=/export/corpora5/slam/SLAM/
+yomdle_dir=/export/corpora5/slam/YOMDLE/
+corpus_dir=/export/corpora5/handwriting_ocr/corpus_data/ko/
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+./local/check_tools.sh
+# Start from stage=-2 for data preparation. This stage stores line images,
+# csv files and splits{train,test,train_unsup} data/download/truth_line_image,
+# data/download/truth_csv and data/local/splits respectively.
+if [ $stage -le -2 ]; then
+  echo "$(date): preparing data, obtaining line images and csv files..."
+  local/yomdle/create_download_dir.sh --language_main $language_main \
+    --slam_dir $slam_dir --yomdle_dir $yomdle_dir
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$(date): getting corpus text for language modelling..."
+  mkdir -p data/local/text/cleaned
+  cat $corpus_dir/* > data/local/text/ko.txt
+  head -20000 data/local/text/ko.txt > data/local/text/cleaned/val.txt
+  tail -n +20000 data/local/text/ko.txt > data/local/text/cleaned/corpus.txt
+fi
+
+mkdir -p data/{train,test}/data
+if [ $stage -le 0 ]; then
+  echo "$0 stage 0: Processing train and test data.$(date)"
+  echo " creating text, images.scp, utt2spk and spk2utt"
+  #local/prepare_data.sh data/download/
+  for set in train test; do
+    local/process_data.py data/download/ \
+      data/local/splits/${set}.txt data/${set}
+    image/fix_data_dir.sh data/${set}
+  done
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  for set in train test; do
+    echo "$(date) Extracting features, creating feats.scp file"
+    local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
+  done
+  image/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$(date) stage 3: BPE preparation"
+  # getting non-silence phones.
+  cut -d' ' -f2- data/train/text | \
+python3 <(
+cat << "END"
+import os, sys, io;
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8');
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8');
+phone_dict = dict();
+for line in infile:
+    line_vect = line.strip().split();
+    for word in line_vect:
+        for phone in word:
+            phone_dict[phone] = phone;
+
+for phone in phone_dict.keys():
+      output.write(phone+ '\n');
+END
+   ) > data/local/text/cleaned/phones.txt
+
+  cut -d' ' -f2- data/train/text > data/local/text/cleaned/train.txt
+
+  echo "learning BPE..."
+  # it is currently learned with only training text but we can also use all corpus text
+  # to learn BPE. phones are added so that one isolated occurance of every phone exists.
+  cat data/local/text/cleaned/phones.txt data/local/text/cleaned/train.txt | \
+    utils/lang/bpe/prepend_words.py | utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$(date) stage 4: applying BPE..."
+  echo "applying BPE on train, test text..."
+  for set in test train; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | utils/lang/bpe/prepend_words.py | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \
+      sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
+  done
+
+  echo "applying BPE to corpus text..."
+  cat data/local/text/cleaned/corpus.txt | utils/lang/bpe/prepend_words.py | \
+    utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \
+    sed 's/@@//g' > data/local/text/cleaned/bpe_corpus.txt
+  cat data/local/text/cleaned/val.txt | utils/lang/bpe/prepend_words.py | \
+    utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt | \
+    sed 's/@@//g' > data/local/text/cleaned/bpe_val.txt
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$(date) stage 5: Preparing dictionary and lang..."
+  local/prepare_dict.sh --dir data/local/dict
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 4 --sil-prob 0.0 --position-dependent-phones false \
+    data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$(date) stage 6: Calling the flat-start chain recipe..."
+  local/chain/run_e2e_cnn.sh
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$(date) stage 7: Aligning the training data using the e2e chain model..."
+  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+    --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \
+    data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+chunk_width='340,300,200,100'
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+if [ $stage -le 8 ]; then
+  echo "$(date) stage 8: Building a tree and training a regular chain model using the e2e alignments..."
+  local/chain/run_cnn_e2eali.sh --chunk_width $chunk_width
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$(date) stage 9: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                               data/lang data/lang_rescore_6g
+fi
+
+if [ $stage -le 10 ] && $decode_e2e; then
+  echo "$(date) stage 10: decoding end2end setup..."
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1;
+
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj 30 --cmd "$cmd" --beam 12 \
+    exp/chain/e2e_cnn_1a/graph data/test exp/chain/e2e_cnn_1a/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test exp/chain/e2e_cnn_1a/decode_test{,_rescored} || exit 1
+
+  echo "Done. Date: $(date). Results:"
+  local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+fi
+
+if [ $stage -le 11 ] && $decode_chain; then
+  echo "$(date) stage 11: decoding chain alignment setup..."
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    exp/chain/cnn_e2eali_1a/ exp/chain/cnn_e2eali_1a/graph || exit 1;
+
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj 30 --cmd "$cmd" --beam 12 \
+    exp/chain/cnn_e2eali_1a/graph data/test exp/chain/cnn_e2eali_1a/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test exp/chain/cnn_e2eali_1a/decode_test{,_rescored} || exit 1
+
+  echo "Done. Date: $(date). Results:"
+  local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
+fi
diff --git a/egs/yomdle_korean/v1/steps b/egs/yomdle_korean/v1/steps
new file mode 120000
index 00000000000..1b186770dd1
--- /dev/null
+++ b/egs/yomdle_korean/v1/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps/
\ No newline at end of file
diff --git a/egs/yomdle_korean/v1/utils b/egs/yomdle_korean/v1/utils
new file mode 120000
index 00000000000..a3279dc8679
--- /dev/null
+++ b/egs/yomdle_korean/v1/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils/
\ No newline at end of file
diff --git a/egs/yomdle_tamil/README.txt b/egs/yomdle_tamil/README.txt
new file mode 100644
index 00000000000..0f295e5ae5f
--- /dev/null
+++ b/egs/yomdle_tamil/README.txt
@@ -0,0 +1,3 @@
+This directory contains example scripts for OCR on the Yomdle and Slam datasets.
+Training is done on the Yomdle dataset and testing is done on Slam.
+LM rescoring is also done with extra corpus data obtained from various sources.
diff --git a/egs/yomdle_tamil/v1/local/yomdle/create_line_image_from_page_image.py b/egs/yomdle_tamil/v1/local/yomdle/create_line_image_from_page_image.py
index 8f03be874e7..885f18c7deb 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/create_line_image_from_page_image.py
+++ b/egs/yomdle_tamil/v1/local/yomdle/create_line_image_from_page_image.py
@@ -29,7 +29,8 @@
 from scipy.spatial import ConvexHull
 from PIL import Image
 from scipy.misc import toimage
-
+from pathlib import Path
+from glob import glob
 parser = argparse.ArgumentParser(description="Creates line images from page image")
 parser.add_argument('image_dir', type=str, help='Path to full page images')
 parser.add_argument('csv_dir', type=str, help='Path to csv files')
@@ -115,7 +116,7 @@ def bounding_area(index, hull):
     return {'area': len_p * len_o,
             'length_parallel': len_p,
             'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'rectangle_center': (min_p + float(len_p) / 2, min_o + float(len_o) / 2),
             'unit_vector': unit_vector_p,
             }
 
@@ -220,8 +221,8 @@ def get_center(im):
     -------
     (int, int): center of the image
     """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
+    center_x = float(im.size[0]) / 2
+    center_y = float(im.size[1]) / 2
     return int(center_x), int(center_y)
 
 
@@ -321,10 +322,18 @@ def update_minimum_bounding_box_input(bounding_box_input):
 ### main ###
 globvar = 0
 text_fh = open(args.output_file, 'w', encoding='utf-8')
-for filename in sorted(os.listdir(args.csv_dir)):
-    with open(os.path.join(args.csv_dir, filename), 'r', encoding='utf-8') as f:
-        image_file = os.path.join(args.image_dir, filename.split('.')[0] + args.ext)
-        im = Image.open(image_file).convert('L')
+file_list = list(Path(args.csv_dir).rglob("*.[cC][sS][vV]"))
+for filename in sorted(file_list):
+    filename = str(filename)
+    with open(str(filename), 'r', encoding='utf-8') as f:
+        base_name = os.path.basename(filename)
+        image_file = os.path.join(args.image_dir, base_name.split('.')[0] + args.ext)
+        try:
+            im = Image.open(image_file).convert('L')
+        except Exception as e:
+            print("Error: No such Image " + row[1])
+            globvar += 1
+            continue
         im = pad_image(im)
         for row in itertools.islice(csv.reader(f), 1, None):
             points = []
diff --git a/egs/yomdle_tamil/v1/local/yomdle/gedi2csv_enriched.py b/egs/yomdle_tamil/v1/local/yomdle/gedi2csv_enriched.py
index 1c9ab618a78..51d7a34e7e8 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/gedi2csv_enriched.py
+++ b/egs/yomdle_tamil/v1/local/yomdle/gedi2csv_enriched.py
@@ -39,7 +39,7 @@ def npbox2string(npar):
 
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
-class GEDI2CSV():
+class GEDI2CSV(object):
     ''' Initialize the extractor'''
     def __init__(self, logger, args):
         self._logger = logger
diff --git a/egs/yomdle_tamil/v1/local/yomdle/yomdle2csv.py b/egs/yomdle_tamil/v1/local/yomdle/yomdle2csv.py
index 49fc41aa5cc..d75b8bcbe8b 100755
--- a/egs/yomdle_tamil/v1/local/yomdle/yomdle2csv.py
+++ b/egs/yomdle_tamil/v1/local/yomdle/yomdle2csv.py
@@ -43,7 +43,7 @@ def npbox2string(npar):
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
 
-class GEDI2CSV():
+class GEDI2CSV(object):
 
     ''' Initialize the extractor'''
     def __init__(self, logger, args):
diff --git a/egs/yomdle_zh/v1/local/create_line_image_from_page_image.py b/egs/yomdle_zh/v1/local/create_line_image_from_page_image.py
index 77a6791d5d7..7135bb1b242 100755
--- a/egs/yomdle_zh/v1/local/create_line_image_from_page_image.py
+++ b/egs/yomdle_zh/v1/local/create_line_image_from_page_image.py
@@ -110,7 +110,7 @@ def bounding_area(index, hull):
     return {'area': len_p * len_o,
             'length_parallel': len_p,
             'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'rectangle_center': (min_p + float(len_p) / 2, min_o + float(len_o) / 2),
             'unit_vector': unit_vector_p,
             }
 
@@ -275,8 +275,8 @@ def get_center(im):
     -------
     (int, int): center of the image
     """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
+    center_x = float(im.size[0]) / 2
+    center_y = float(im.size[1]) / 2
     return int(center_x), int(center_y)
 
 
diff --git a/egs/yomdle_zh/v1/local/gedi2csv.py b/egs/yomdle_zh/v1/local/gedi2csv.py
index 43a07421dd1..0b80c2e80bb 100755
--- a/egs/yomdle_zh/v1/local/gedi2csv.py
+++ b/egs/yomdle_zh/v1/local/gedi2csv.py
@@ -55,7 +55,7 @@ def npbox2string(npar):
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
     
-class GEDI2CSV():
+class GEDI2CSV(object):
 
     """ Initialize the extractor"""
     def __init__(self, logger, args):
diff --git a/egs/yomdle_zh/v1/local/yomdle2csv.py b/egs/yomdle_zh/v1/local/yomdle2csv.py
index 3641de90324..8f208e2d968 100755
--- a/egs/yomdle_zh/v1/local/yomdle2csv.py
+++ b/egs/yomdle_zh/v1/local/yomdle2csv.py
@@ -55,7 +55,7 @@ def npbox2string(npar):
 # cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
 # Get 4 corners of the rectangle using cv2.boxPoints()
 
-class GEDI2CSV():
+class GEDI2CSV(object):
 
     """ Initialize the extractor"""
     def __init__(self, logger, args):
diff --git a/misc/maintenance/cpplint.py b/misc/maintenance/cpplint.py
index 03d0569ab1c..91658705f41 100755
--- a/misc/maintenance/cpplint.py
+++ b/misc/maintenance/cpplint.py
@@ -83,6 +83,7 @@
 We do a small hack, which is to ignore //'s with "'s after them on the
 same line, but it is far from perfect (in either direction).
 """
+from __future__ import division
 
 import codecs
 import getopt
@@ -564,7 +565,7 @@ def IncrementErrorCount(self, category):
 
   def PrintErrorCounts(self):
     """Print a summary of errors by category, and the total."""
-    for category, count in self.errors_by_category.iteritems():
+    for category, count in self.errors_by_category.items():
       sys.stderr.write('Category \'%s\' errors found: %d\n' %
                        (category, count))
     sys.stderr.write('Total errors found: %d\n' % self.error_count)
@@ -656,7 +657,7 @@ def Check(self, error, filename, linenum):
     trigger = base_trigger * 2**_VerboseLevel()
 
     if self.lines_in_function > trigger:
-      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
+      error_level = int(math.log(float(self.lines_in_function) / base_trigger, 2))
       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
       if error_level > 5:
         error_level = 5
@@ -676,7 +677,7 @@ class _IncludeError(Exception):
   pass
 
 
-class FileInfo:
+class FileInfo(object):
   """Provides utility functions for filenames.
 
   FileInfo provides easy access to the components of a file's path
@@ -1012,7 +1013,7 @@ def CheckForCopyright(filename, lines, error):
 
   # We'll say it should occur by line 10. Don't forget there's a
   # dummy line at the front.
-  for line in xrange(1, min(len(lines), 11)):
+  for line in range(1, min(len(lines), 11)):
     if re.search(r'Copyright', lines[line], re.I): break
   else:                       # means no copyright line was found
     error(filename, 0, 'legal/copyright', 5,
@@ -1604,7 +1605,7 @@ def CheckForFunctionLengths(filename, clean_lines, linenum,
 
   if starting_func:
     body_found = False
-    for start_linenum in xrange(linenum, clean_lines.NumLines()):
+    for start_linenum in range(linenum, clean_lines.NumLines()):
       start_line = lines[start_linenum]
       joined_line += ' ' + start_line.lstrip()
       if Search(r'(;|})', start_line):  # Declarations and trivial functions
@@ -2073,7 +2074,7 @@ def GetLineWidth(line):
     The width of the line in column positions, accounting for Unicode
     combining characters and wide characters.
   """
-  if isinstance(line, unicode):
+  if isinstance(line, str):
     width = 0
     for c in unicodedata.normalize('NFC', line):
       if unicodedata.east_asian_width(c) in ('W', 'F'):
@@ -2861,7 +2862,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   required = {}  # A map of header name to linenumber and the template entity.
                  # Example of required: { '<functional>': (1219, 'less<>') }
 
-  for linenum in xrange(clean_lines.NumLines()):
+  for linenum in range(clean_lines.NumLines()):
     line = clean_lines.elided[linenum]
     if not line or line[0] == '#':
       continue
@@ -2994,7 +2995,7 @@ def ProcessFileData(filename, file_extension, lines, error):
 
   RemoveMultiLineComments(filename, lines, error)
   clean_lines = CleansedLines(lines)
-  for line in xrange(clean_lines.NumLines()):
+  for line in range(clean_lines.NumLines()):
     ProcessLine(filename, file_extension, clean_lines, line,
                 include_state, function_state, class_state, error)
   class_state.CheckFinished(filename, error)
diff --git a/scripts/rnnlm/get_best_model.py b/scripts/rnnlm/get_best_model.py
index e8c6bd8a2f4..333ed8dbfc7 100755
--- a/scripts/rnnlm/get_best_model.py
+++ b/scripts/rnnlm/get_best_model.py
@@ -3,14 +3,14 @@
 # Copyright  2017  Johns Hopkins University (author: Daniel Povey)
 # License: Apache 2.0.
 
-import os
 import argparse
-import sys
+import glob
 import re
+import sys
 
 parser = argparse.ArgumentParser(description="Works out the best iteration of RNNLM training "
-                                 "based on dev-set perplexity, and prints the number corresponding "
-                                 "to that iteration",
+                                             "based on dev-set perplexity, and prints the number corresponding "
+                                             "to that iteration",
                                  epilog="E.g. " + sys.argv[0] + " exp/rnnlm_a",
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
@@ -19,8 +19,7 @@
 
 args = parser.parse_args()
 
-
-num_iters=None
+num_iters = None
 try:
     with open(args.rnnlm_dir + "/info.txt", encoding="latin-1") as f:
         for line in f:
@@ -36,15 +35,15 @@
     sys.exit(sys.argv[0] + ": could not get num_iters from {0}/info.txt".format(
         args.rnnlm_dir))
 
-best_objf=-2000
-best_iter=-1
+best_objf = -2000
+best_iter = -1
 for i in range(1, num_iters):
     this_logfile = "{0}/log/compute_prob.{1}.log".format(args.rnnlm_dir, i)
     try:
         f = open(this_logfile, 'r', encoding='latin-1')
     except:
         sys.exit(sys.argv[0] + ": could not open log-file {0}".format(this_logfile))
-    this_objf=-1000
+    this_objf = -1000
     for line in f:
         m = re.search('Overall objf .* (\S+)$', str(line))
         if m is not None:
@@ -53,6 +52,10 @@
             except Exception as e:
                 sys.exit(sys.argv[0] + ": line in file {0} could not be parsed: {1}, error is: {2}".format(
                     this_logfile, line, str(e)))
+    # verify this iteration still has model files present
+    if len(glob.glob("{0}/{1}.raw".format(args.rnnlm_dir, i))) == 0:
+        # this iteration has log files, but model files have been cleaned up, skip it
+        continue
     if this_objf == -1000:
         print(sys.argv[0] + ": warning: could not parse objective function from {0}".format(
             this_logfile), file=sys.stderr)
@@ -63,5 +66,4 @@
 if best_iter == -1:
     sys.exit(sys.argv[0] + ": error: could not get best iteration.")
 
-
 print(str(best_iter))
diff --git a/scripts/rnnlm/get_embedding_dim.py b/scripts/rnnlm/get_embedding_dim.py
index a5ddb8c25f3..63eaf307498 100755
--- a/scripts/rnnlm/get_embedding_dim.py
+++ b/scripts/rnnlm/get_embedding_dim.py
@@ -101,4 +101,4 @@
              "nnet '{0}': {1} != {2}".format(
             args.nnet, input_dim, output_dim))
 
-print(str(input_dim))
+print('{}'.format(input_dim))
diff --git a/scripts/rnnlm/rnnlm_cleanup.py b/scripts/rnnlm/rnnlm_cleanup.py
new file mode 100644
index 00000000000..40cbee7a496
--- /dev/null
+++ b/scripts/rnnlm/rnnlm_cleanup.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+
+# Copyright 2018 Tilde
+# License: Apache 2.0
+
+import sys
+
+import argparse
+import os
+import re
+import glob
+
+script_name = sys.argv[0]
+
+parser = argparse.ArgumentParser(description="Removes models from past training iterations of "
+                                             "RNNLM. Can use either 'keep_latest' (default) or "
+                                             "'keep_best' cleanup strategy, where former keeps "
+                                             "the models that are freshest, while latter keeps "
+                                             "the models with best training objective score on "
+                                             "dev set.",
+                                 epilog="E.g. " + script_name + " exp/rnnlm_a --keep_best",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+parser.add_argument("rnnlm_dir",
+                    help="Directory where the RNNLM has been trained")
+parser.add_argument("--iters_to_keep",
+                    help="Max number of iterations to keep",
+                    type=int,
+                    default=3)
+parser.add_argument("--keep_latest",
+                    help="Keeps the training iterations that are latest by age",
+                    action="store_const",
+                    const=True,
+                    default=False)
+parser.add_argument("--keep_best",
+                    help="Keeps the training iterations that have the best objf",
+                    action="store_const",
+                    const=True,
+                    default=False)
+
+args = parser.parse_args()
+
+# validate arguments
+if args.keep_latest and args.keep_best:
+    sys.exit(script_name + ": can only use one of 'keep_latest' or 'keep_best', but not both")
+elif not args.keep_latest and not args.keep_best:
+    sys.exit(script_name + ": no cleanup strategy specified: use 'keep_latest' or 'keep_best'")
+
+
+class IterationInfo:
+    def __init__(self, model_files, objf, compute_prob_done):
+        self.model_files = model_files
+        self.objf = objf
+        self.compute_prob_done = compute_prob_done
+
+    def __str__(self):
+        return "{model_files: %s, compute_prob: %s, objf: %2.3f}" % (self.model_files,
+                                                                     self.compute_prob_done,
+                                                                     self.objf)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+def get_compute_prob_info(log_file):
+    # we want to know 3 things: iteration number, objf and whether compute prob is done
+    iteration = int(log_file.split(".")[-2])
+    objf = -2000
+    compute_prob_done = False
+    # roughly based on code in get_best_model.py
+    try:
+        f = open(log_file, "r", encoding="latin-1")
+    except:
+        print(script_name + ": warning: compute_prob log not found for iteration " +
+              str(iter) + ". Skipping",
+              file=sys.stderr)
+        return iteration, objf, compute_prob_done
+    for line in f:
+        objf_m = re.search('Overall objf .* (\S+)$', str(line))
+        if objf_m is not None:
+            try:
+                objf = float(objf_m.group(1))
+            except Exception as e:
+                sys.exit(script_name + ": line in file {0} could not be parsed: {1}, error is: {2}".format(
+                    log_file, line, str(e)))
+        if "# Ended" in line:
+            compute_prob_done = True
+    if objf == -2000:
+        print(script_name + ": warning: could not parse objective function from " + log_file, file=sys.stderr)
+    return iteration, objf, compute_prob_done
+
+
+def get_iteration_files(exp_dir):
+    iterations = dict()
+    compute_prob_logs = glob.glob(exp_dir + "/log/compute_prob.[0-9]*.log")
+    for log in compute_prob_logs:
+        iteration, objf, compute_prob_done = get_compute_prob_info(log)
+        if iteration == 0:
+            # iteration 0 is special, never consider it for cleanup
+            continue
+        if compute_prob_done:
+            # this iteration can be safely considered for cleanup
+            # gather all model files belonging to it
+            model_files = []
+            # when there are multiple jobs per iteration, there can be several model files
+            # we need to potentially clean them all up without mixing them up
+            model_files.extend(glob.glob("{0}/word_embedding.{1}.mat".format(exp_dir, iteration)))
+            model_files.extend(glob.glob("{0}/word_embedding.{1}.[0-9]*.mat".format(exp_dir, iteration)))
+            model_files.extend(glob.glob("{0}/feat_embedding.{1}.mat".format(exp_dir, iteration)))
+            model_files.extend(glob.glob("{0}/feat_embedding.{1}.[0-9]*.mat".format(exp_dir, iteration)))
+            model_files.extend(glob.glob("{0}/{1}.raw".format(exp_dir, iteration)))
+            model_files.extend(glob.glob("{0}/{1}.[0-9]*.raw".format(exp_dir, iteration)))
+            # compute_prob logs outlive model files, only consider iterations that do still have model files
+            if len(model_files) > 0:
+                iterations[iteration] = IterationInfo(model_files, objf, compute_prob_done)
+    return iterations
+
+
+def remove_model_files_for_iter(iter_info):
+    for f in iter_info.model_files:
+        os.remove(f)
+
+
+def keep_latest(iteration_dict):
+    max_to_keep = args.iters_to_keep
+    kept = 0
+    iterations_in_reverse_order = reversed(sorted(iteration_dict))
+    for iter in iterations_in_reverse_order:
+        if kept < max_to_keep:
+            kept += 1
+        else:
+            remove_model_files_for_iter(iteration_dict[iter])
+
+
+def keep_best(iteration_dict):
+    iters_to_keep = args.iters_to_keep
+    best = []
+    for iter, iter_info in iteration_dict.items():
+        objf = iter_info.objf
+        if objf == -2000:
+            print(script_name + ": warning: objf unavailable for iter " + str(iter), file=sys.stderr)
+            continue
+        # add potential best, sort by objf, trim to iters_to_keep size
+        best.append((iter, objf))
+        best = sorted(best, key=lambda x: -x[1])
+        if len(best) > iters_to_keep:
+            throwaway = best[iters_to_keep:]
+            best = best[:iters_to_keep]
+            # remove iters that we know are not the best
+            for (iter, _) in throwaway:
+                remove_model_files_for_iter(iteration_dict[iter])
+
+
+# grab all the iterations mapped to their model files, objf score and compute_prob status
+iterations = get_iteration_files(args.rnnlm_dir)
+# apply chosen cleanup strategy
+if args.keep_latest:
+    keep_latest(iterations)
+else:
+    keep_best(iterations)
diff --git a/scripts/rnnlm/train_rnnlm.sh b/scripts/rnnlm/train_rnnlm.sh
index aedfc470ac9..d6d38f3d734 100755
--- a/scripts/rnnlm/train_rnnlm.sh
+++ b/scripts/rnnlm/train_rnnlm.sh
@@ -38,6 +38,11 @@ num_egs_threads=10  # number of threads used for sampling, if we're using
 use_gpu=true  # use GPU for training
 use_gpu_for_diagnostics=false  # set true to use GPU for compute_prob_*.log
 
+# optional cleanup options
+cleanup=false  # add option --cleanup true to enable automatic cleanup of old models
+cleanup_strategy="keep_latest"  # determines cleanup strategy, use either "keep_latest" or "keep_best"
+cleanup_keep_iters=3  # number of iterations that will have their models retained
+
 trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM
 . utils/parse_options.sh
 
@@ -222,12 +227,16 @@ while [ $x -lt $num_iters ]; do
           nnet3-average $src_models $dir/$[x+1].raw '&&' \
           matrix-sum --average=true $src_matrices $dir/${embedding_type}_embedding.$[x+1].mat
       fi
+      # optionally, perform cleanup after training
+      if [ "$cleanup" = true ] ; then
+        python3 rnnlm/rnnlm_cleanup.py $dir --$cleanup_strategy --iters_to_keep $cleanup_keep_iters
+      fi
     )
-
     # the error message below is not that informative, but $cmd will
     # have printed a more specific one.
     [ -f $dir/.error ] && echo "$0: error with diagnostics on iteration $x of training" && exit 1;
   fi
+
   x=$[x+1]
   num_splits_processed=$[num_splits_processed+this_num_jobs]
 done
diff --git a/src/Makefile b/src/Makefile
index 6dfd146e3d5..1b37ebce745 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -36,6 +36,7 @@ all:
 	$(MAKE) kaldi.mk
 	$(MAKE) mklibdir
 	$(MAKE) subdirs
+	$(MAKE) -C matrix test
 	-echo Done
 
 subdirs: $(SUBDIRS)
diff --git a/src/base/io-funcs.h b/src/base/io-funcs.h
index ca476033950..6c2b690f54c 100644
--- a/src/base/io-funcs.h
+++ b/src/base/io-funcs.h
@@ -31,7 +31,9 @@
 #include <cctype>
 #include <vector>
 #include <string>
+
 #include "base/kaldi-common.h"
+#include "base/io-funcs-inl.h"
 
 namespace kaldi {
 
@@ -235,7 +237,4 @@ inline void InitKaldiOutputStream(std::ostream &os, bool binary);
 inline bool InitKaldiInputStream(std::istream &is, bool *binary);
 
 }  // end namespace kaldi.
-
-#include "base/io-funcs-inl.h"
-
 #endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/src/bin/draw-tree.cc b/src/bin/draw-tree.cc
index ad1dd41a53f..c9be5586933 100644
--- a/src/bin/draw-tree.cc
+++ b/src/bin/draw-tree.cc
@@ -18,6 +18,7 @@
 // limitations under the License.
 
 #include "tree/tree-renderer.h"
+#include "tree/context-dep.h"
 
 void MakeEvent(std::string &qry, fst::SymbolTable *phone_syms,
                kaldi::EventType **query)
diff --git a/src/chainbin/chain-get-supervision.cc b/src/chainbin/chain-get-supervision.cc
index 6090d9f0058..1ac89d4630b 100644
--- a/src/chainbin/chain-get-supervision.cc
+++ b/src/chainbin/chain-get-supervision.cc
@@ -22,6 +22,7 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "chain/chain-supervision.h"
+#include "tree/context-dep.h"
 
 namespace kaldi {
 namespace chain {
diff --git a/src/chainbin/nnet3-chain-combine.cc b/src/chainbin/nnet3-chain-combine.cc
index a3222d2285f..b534316bf7f 100644
--- a/src/chainbin/nnet3-chain-combine.cc
+++ b/src/chainbin/nnet3-chain-combine.cc
@@ -72,7 +72,7 @@ double ComputeObjf(bool batchnorm_test_mode, bool dropout_test_mode,
 void UpdateNnetMovingAverage(int32 num_models,
     const Nnet &nnet, Nnet *moving_average_nnet) {
   KALDI_ASSERT(NumParameters(nnet) == NumParameters(*moving_average_nnet));
-  ScaleNnet((num_models - 1.0) / num_models, moving_average_nnet);
+  ScaleNnetForAverage((num_models - 1.0) / num_models, moving_average_nnet);
   AddNnet(nnet, 1.0 / num_models, moving_average_nnet);
 }
 
@@ -117,7 +117,7 @@ int main(int argc, char *argv[]) {
     po.Register("use-gpu", &use_gpu,
                 "yes|no|optional|wait, only has effect if compiled with CUDA");
     po.Register("batchnorm-test-mode", &batchnorm_test_mode,
-                "If true, set test-mode to true on any BatchNormComponents "
+                "If true, set test-mode to true on any BatchNormComponents and BatchRenormComponents"
                 "while evaluating objectives.");
     po.Register("dropout-test-mode", &dropout_test_mode,
                 "If true, set test-mode to true on any DropoutComponents and "
diff --git a/src/configure b/src/configure
index c4a1445efbd..b94731da918 100755
--- a/src/configure
+++ b/src/configure
@@ -558,66 +558,23 @@ function linux_check_static {
   fi
 }
 
-function linux_configure_debian_ubuntu {
-  m=$1
-  ATLASLIBS="/usr/lib$m/atlas-base/libatlas.so.3gf  /usr/lib$m/atlas-base/libf77blas.so.3gf /usr/lib$m/atlas-base/libcblas.so.3gf  /usr/lib$m/atlas-base/liblapack_atlas.so.3gf"
-  for f in $ATLASLIBS; do
-    [ ! -f $f ] && return 1;
-  done
-  lapacklib=$(echo $ATLASLIBS | awk '{print $NF}')
-  if ! nm --dynamic $lapacklib | grep ATL_cgetrf >/dev/null; then
-    exit 1;
-  fi
-  echo ATLASINC = $ATLASROOT/include >> kaldi.mk
-  echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
-  echo >> kaldi.mk
-  if [[ "$TARGET_ARCH" == arm* ]]; then
-    cat makefiles/linux_atlas_arm.mk >> kaldi.mk
-   elif [[ "$TARGET_ARCH" == ppc64le ]]; then
-    cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk
-  else
-    cat makefiles/linux_atlas.mk >> kaldi.mk
-  fi
-  echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS"
-  $use_cuda && configure_cuda
-  linux_configure_speex
-}
-
-function linux_configure_debian_ubuntu3 {
-  ATLASLIBS="/usr/lib/libatlas.so.3  /usr/lib/libf77blas.so.3 /usr/lib/libcblas.so.3  /usr/lib/liblapack_atlas.so.3"
-  for f in $ATLASLIBS; do
-    [ ! -f $f ] && return 1;
-  done
-  lapacklib=$(echo $ATLASLIBS | awk '{print $NF}')
-  if ! nm --dynamic $lapacklib | grep ATL_cgetrf >/dev/null; then
-    exit 1;
-  fi
-  echo ATLASINC = $ATLASROOT/include >> kaldi.mk
-  echo ATLASLIBS = $ATLASLIBS >> kaldi.mk
-  echo >> kaldi.mk
-  if [[ "$TARGET_ARCH" == arm* ]]; then
-    cat makefiles/linux_atlas_arm.mk >> kaldi.mk
-  elif [[ "$TARGET_ARCH" == ppc64le ]]; then
-    cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk
-  else
-    cat makefiles/linux_atlas.mk >> kaldi.mk
-  fi
-  echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS"
-  $use_cuda && configure_cuda
-  linux_configure_speex
-}
-
-function linux_configure_debian7 {
-  ATLASLIBS="/usr/lib/atlas-base/libatlas.so.3.0 /usr/lib/atlas-base/libf77blas.so.3.0 /usr/lib/atlas-base/libcblas.so.3 /usr/lib/atlas-base/liblapack_atlas.so.3"
+function linux_configure_atlas_generic {
+  # You pass in a directory (e.g. /usr/lib/atlas-base) and a suffix (e.g. so.3.0)
+  # and it tries to find ATLAS libraries with that dir and suffix.  On success it
+  # returns 0; on failure, it returns 1.
+  dir=$1
+  suffix=$2
+  ATLASLIBS="$dir/libatlas.$suffix $dir/libf77blas.$suffix $dir/libcblas.$suffix $dir/liblapack_atlas.$suffix"
   for f in $ATLASLIBS; do
     [ ! -f $f ] && return 1;
   done
   lapacklib=$(echo $ATLASLIBS | awk '{print $NF}')
   if ! nm --dynamic $lapacklib | grep ATL_cgetrf >/dev/null; then
+    echo "configure: failed to find symbol ATL_cgetrf in library $lapacklib"
     exit 1;
   fi
   libdir=$(dirname $(echo $ATLASLIBS | awk '{print $1}'))
-  [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_debian7" && exit 1;
+  [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_atlas_generic: dir=$dir,suffix=$suffix" && exit 1;
   echo ATLASINC = $ATLASROOT/include >> kaldi.mk
   echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk
   echo >> kaldi.mk
@@ -628,33 +585,11 @@ function linux_configure_debian7 {
   else
     cat makefiles/linux_atlas.mk >> kaldi.mk
   fi
-  echo "Successfully configured for Debian 7 [dynamic libraries] with ATLASLIBS =$ATLASLIBS"
+  echo "Successfully configured ATLAS with ATLASLIBS=$ATLASLIBS"
   $use_cuda && configure_cuda
   linux_configure_speex
 }
 
-function linux_configure_redhat {
-  m=$1  # 64 or empty.
-  ATLASLIBS="/usr/lib$m/atlas/libatlas.so.3 /usr/lib$m/atlas/libf77blas.so.3 /usr/lib$m/atlas/libcblas.so.3 /usr/lib$m/atlas/libclapack.so.3"
-  for f in $ATLASLIBS; do
-    [ ! -f $f ] && return 1;
-  done
-  libdir=$(dirname $(echo $ATLASLIBS | awk '{print $1}'))
-  [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_redhat" && exit 1;
-  echo ATLASINC = $ATLASROOT/include >> kaldi.mk
-  echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk
-  echo >> kaldi.mk
-  if [[ "$TARGET_ARCH" == arm* ]]; then
-    cat makefiles/linux_atlas_arm.mk >> kaldi.mk
-  elif [[ "$TARGET_ARCH" == ppc64le ]]; then
-    cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk
-  else
-    cat makefiles/linux_atlas.mk >> kaldi.mk
-  fi
-  echo "Successfully configured for red hat [dynamic libraries] with ATLASLIBS =$ATLASLIBS"
-  $use_cuda && configure_cuda
-}
-
 function linux_configure_redhat_fat {
   # This is for when only two so-called 'fat' ATLAS libs are provided:
   # libsatlas.so.3 and libtatlas.so.3.
@@ -680,7 +615,7 @@ function linux_configure_redhat_fat {
   $use_cuda && configure_cuda
 }
 
-function linux_configure_static {
+function linux_configure_atlas_static {
   if $threaded_atlas; then pt=pt; else pt=""; fi
 
   if [ -z $ATLASLIBDIR ]; then # Note: it'll pick up the last one below.
@@ -699,11 +634,11 @@ function linux_configure_static {
   echo "Validating presence of ATLAS libs in $ATLASLIBDIR"
   ATLASLIBS=
   # The Lapack part of ATLAS seems to appear under various different names.. but it
-  # should always have symbols like ATL_cgetrf defined, so we test for this,
-  # for all the names we have encountered.
+  # should always have symbols like ATL_cgetrf and clapack_cgetrf defined, so we test for this.
   for libname in liblapack liblapack_atlas  libclapack; do
     if [ -f $ATLASLIBDIR/${libname}.a -a "$ATLASLIBS" == "" ]; then
-      if nm  $ATLASLIBDIR/${libname}.a  | grep ATL_cgetrf >/dev/null; then
+      if nm  $ATLASLIBDIR/${libname}.a  | grep ATL_cgetrf >/dev/null && \
+	 nm  $ATLASLIBDIR/${libname}.a  | grep clapack_cgetrf >/dev/null; then
          ATLASLIBS=$ATLASLIBDIR/${libname}.a
          echo "Using library $ATLASLIBS as ATLAS's CLAPACK library."
       fi
@@ -782,11 +717,11 @@ function linux_configure_dynamic {
     echo "Validating presence of ATLAS libs in $ATLASLIBDIR"
     ATLASLIBS=
     # The Lapack part of ATLAS seems to appear under various different names.. but it
-    # should always have symbols like ATL_cgetrf defined, so we test for this,
-    # for all the names we have encountered.
+    # should always have symbols like clapack_cgetrf and ATL_cgetrf defined, so we test for this.
     for libname in lapack lapack_atlas  clapack; do
       if [ -f $ATLASLIBDIR/lib${libname}.so -a "$ATLASLIBS" == "" ]; then
-        if nm  --dynamic $ATLASLIBDIR/lib${libname}.so  | grep ATL_cgetrf >/dev/null; then
+        if nm  --dynamic $ATLASLIBDIR/lib${libname}.so  | grep clapack_cgetrf >/dev/null && \
+           nm  --dynamic $ATLASLIBDIR/lib${libname}.so  | grep ATL_cgetrf >/dev/null; then
            ATLASLIBS="$ATLASLIBDIR/lib${libname}.so"
            echo "Using library $ATLASLIBS as ATLAS's CLAPACK library."
         fi
@@ -1229,33 +1164,18 @@ elif [ "`uname`" == "Linux" ]; then
     # containing {liblapack.a,libblas.a}, and linking against just these two
     # libraries worked.
 
-    if $static_math; then
-      # Prefer static to dynamic math.
-      linux_configure_static || \
-        linux_configure_debian_ubuntu3 || \
-        linux_configure_dynamic || \
-        linux_configure_debian_ubuntu 64 || \
-        linux_configure_debian_ubuntu || \
-        linux_configure_debian7 || \
-        linux_configure_redhat 64 || \
-        linux_configure_redhat || \
-        linux_configure_redhat_fat 64 || \
-        linux_configure_redhat_fat || \
-        linux_atlas_failure "Failed to configure ATLAS libraries";
-    else
-      # Prefer dynamic to static math.
-      linux_configure_debian_ubuntu3 || \
-        linux_configure_dynamic || \
-        linux_configure_static || \
-        linux_configure_debian_ubuntu 64 || \
-        linux_configure_debian_ubuntu || \
-        linux_configure_debian7 || \
-        linux_configure_redhat 64 || \
-        linux_configure_redhat || \
-        linux_configure_redhat_fat 64 || \
-        linux_configure_redhat_fat || \
-        linux_atlas_failure "Failed to configure ATLAS libraries";
-    fi
+    ( $static_math && linux_configure_atlas_static ) || \
+      linux_configure_atlas_generic /usr/lib "so.3" || \
+      linux_configure_atlas_generic /usr/lib/atlas-base "so.3gf" || \
+      linux_configure_atlas_generic /usr/lib64/atlas-base "so.3gf" \
+      linux_configure_atlas_generic /usr/lib/atlas "so.3" || \
+      linux_configure_atlas_generic /usr/lib64/atlas "so.3" || \
+      linux_configure_atlas_generic /usr/lib/x86_64-linux-gnu/ "so.3" || \
+      linux_configure_atlas_generic /usr/lib/x86_64-linux-gnu/ "so" || \
+      linux_configure_redhat_fat 64 || \
+      linux_configure_redhat_fat || \
+      linux_configure_atlas_static || \
+      linux_atlas_failure "Failed to configure ATLAS libraries";
 
   elif [ "$MATHLIB" == "MKL" ]; then
     if [ "$TARGET_ARCH" != "x86_64" ]; then
diff --git a/src/decoder/decodable-matrix.h b/src/decoder/decodable-matrix.h
index f32a007e6ca..475638a35af 100644
--- a/src/decoder/decodable-matrix.h
+++ b/src/decoder/decodable-matrix.h
@@ -26,6 +26,7 @@
 #include "base/kaldi-common.h"
 #include "hmm/transition-model.h"
 #include "itf/decodable-itf.h"
+#include "matrix/kaldi-matrix.h"
 
 namespace kaldi {
 
@@ -241,8 +242,6 @@ class DecodableMatrixScaled: public DecodableInterface {
   BaseFloat scale_;
   KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixScaled);
 };
-
-
 }  // namespace kaldi
 
 #endif  // KALDI_DECODER_DECODABLE_MATRIX_H_
diff --git a/src/decoder/lattice-faster-online-decoder.h b/src/decoder/lattice-faster-online-decoder.h
index e56f24a2474..69bf8b6d98d 100644
--- a/src/decoder/lattice-faster-online-decoder.h
+++ b/src/decoder/lattice-faster-online-decoder.h
@@ -90,7 +90,7 @@ class LatticeFasterOnlineDecoderTpl:
 
   /// Outputs an FST corresponding to the single best path through the lattice.
   /// This is quite efficient because it doesn't get the entire raw lattice and find
-  /// the best path through it; insterad, it uses the BestPathEnd and BestPathIterator
+  /// the best path through it; instead, it uses the BestPathEnd and BestPathIterator
   /// so it basically traces it back through the lattice.
   /// Returns true if result is nonempty (using the return status is deprecated,
   /// it will become void).  If "use_final_probs" is true AND we reached the
diff --git a/src/decoder/training-graph-compiler.h b/src/decoder/training-graph-compiler.h
index 77c5735687f..ee56c6dfb3d 100644
--- a/src/decoder/training-graph-compiler.h
+++ b/src/decoder/training-graph-compiler.h
@@ -24,6 +24,7 @@
 #include "hmm/transition-model.h"
 #include "fst/fstlib.h"
 #include "fstext/fstext-lib.h"
+#include "tree/context-dep.h"
 
 
 namespace kaldi {
diff --git a/src/doc/grammar.dox b/src/doc/grammar.dox
index 80000c0b067..d1c6f51f349 100644
--- a/src/doc/grammar.dox
+++ b/src/doc/grammar.dox
@@ -336,7 +336,7 @@ Z_S  243
   that consume CLG.fst always also consume the <code>ilabel_info</code>, which is a <code>vector<vector<int32> ></code>.
   For a particular ilabel, say 1536, <code>ilabel_info[1536] = { 5, 21 }</code> is a vector of integers representing
   a phone-in-context.  E.g. this would represent the phone 21 with a left-context of 5.
-  Disambiguation symbols also appear on the input of CLG.fst, and they are are represented in the <code>ilabel_info</code>
+  Disambiguation symbols also appear on the input of CLG.fst, and they are represented in the <code>ilabel_info</code>
   a 1-dimensional vector like <code>{ -104 }</code> containing the negative of the disambiguation symbol's
   integer id.
 
@@ -352,7 +352,7 @@ Z_S  243
   The special symbols in CLG.fst will be as follows.
 
   The following special symbols may appear in any CLG graph, top-level or not:
-   - When any graph invokes a sub-graph, there will ben arc with an ilabel
+   - When any graph invokes a sub-graph, there will be n arc with an ilabel
      (</code>\#nonterm:foo</code>, <em>left-context-phone</em>) representing the
      user-specified nonterminal and the actual left-context, which will be
      followed by arcs with ilabels of the form (</code>\#nonterm_reenter</code>,
diff --git a/src/gmmbin/gmm-init-biphone.cc b/src/gmmbin/gmm-init-biphone.cc
index e5cc182f94c..42a9d1a91a0 100644
--- a/src/gmmbin/gmm-init-biphone.cc
+++ b/src/gmmbin/gmm-init-biphone.cc
@@ -22,6 +22,7 @@
 #include "util/common-utils.h"
 #include "gmm/am-diag-gmm.h"
 #include "tree/event-map.h"
+#include "tree/context-dep.h"
 #include "hmm/hmm-topology.h"
 #include "hmm/transition-model.h"
 
diff --git a/src/gmmbin/gmm-init-mono.cc b/src/gmmbin/gmm-init-mono.cc
index 0aac769eb70..3c370c36515 100644
--- a/src/gmmbin/gmm-init-mono.cc
+++ b/src/gmmbin/gmm-init-mono.cc
@@ -23,6 +23,7 @@
 #include "gmm/am-diag-gmm.h"
 #include "hmm/hmm-topology.h"
 #include "hmm/transition-model.h"
+#include "tree/context-dep.h"
 
 namespace kaldi {
 // This function reads a file like:
diff --git a/src/hmm/hmm-test-utils.h b/src/hmm/hmm-test-utils.h
index 495ebf278ae..4faaa92fa66 100644
--- a/src/hmm/hmm-test-utils.h
+++ b/src/hmm/hmm-test-utils.h
@@ -24,6 +24,7 @@
 #include "hmm/hmm-topology.h"
 #include "hmm/transition-model.h"
 #include "lat/kaldi-lattice.h"
+#include "tree/context-dep.h"
 
 namespace kaldi {
 
diff --git a/src/hmm/hmm-topology.h b/src/hmm/hmm-topology.h
index edea02998c0..750d35bcfe4 100644
--- a/src/hmm/hmm-topology.h
+++ b/src/hmm/hmm-topology.h
@@ -21,7 +21,6 @@
 #define KALDI_HMM_HMM_TOPOLOGY_H_
 
 #include "base/kaldi-common.h"
-#include "tree/context-dep.h"
 #include "util/const-integer-set.h"
 
 
diff --git a/src/hmm/posterior.h b/src/hmm/posterior.h
index 0c255845dd5..e153c249740 100644
--- a/src/hmm/posterior.h
+++ b/src/hmm/posterior.h
@@ -24,7 +24,6 @@
 #define KALDI_HMM_POSTERIOR_H_
 
 #include "base/kaldi-common.h"
-#include "tree/context-dep.h"
 #include "util/const-integer-set.h"
 #include "util/kaldi-table.h"
 #include "hmm/transition-model.h"
diff --git a/src/hmm/transition-model.h b/src/hmm/transition-model.h
index f03b54e8b71..e453c24f9cb 100644
--- a/src/hmm/transition-model.h
+++ b/src/hmm/transition-model.h
@@ -22,11 +22,12 @@
 #define KALDI_HMM_TRANSITION_MODEL_H_
 
 #include "base/kaldi-common.h"
-#include "tree/context-dep.h"
 #include "util/const-integer-set.h"
 #include "fst/fst-decl.h" // forward declarations.
 #include "hmm/hmm-topology.h"
 #include "itf/options-itf.h"
+#include "itf/context-dep-itf.h"
+#include "matrix/kaldi-vector.h"
 
 namespace kaldi {
 
diff --git a/src/lat/minimize-lattice.h b/src/lat/minimize-lattice.h
index fcf6c0f36df..eb13fc1c851 100644
--- a/src/lat/minimize-lattice.h
+++ b/src/lat/minimize-lattice.h
@@ -28,7 +28,6 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "fstext/fstext-lib.h"
-#include "hmm/transition-model.h"
 #include "lat/kaldi-lattice.h"
 
 namespace fst {
diff --git a/src/lat/push-lattice.h b/src/lat/push-lattice.h
index e782aadc0f3..080bb637604 100644
--- a/src/lat/push-lattice.h
+++ b/src/lat/push-lattice.h
@@ -28,7 +28,6 @@
 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "fstext/fstext-lib.h"
-#include "hmm/transition-model.h"
 #include "lat/kaldi-lattice.h"
 
 namespace fst {
diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk
index ee0f3c2e90b..7af6497abec 100644
--- a/src/makefiles/default_rules.mk
+++ b/src/makefiles/default_rules.mk
@@ -27,10 +27,16 @@ endif
 
 all: $(LIBFILE) $(BINFILES)
 
-$(LIBFILE): $(OBJFILES)
+
+ifdef LIBNAME
+
+$(LIBNAME).a: $(OBJFILES)
 	$(AR) -cr $(LIBNAME).a $(OBJFILES)
 	$(RANLIB) $(LIBNAME).a
+
 ifeq ($(KALDI_FLAVOR), dynamic)
+# the LIBFILE is not the same as $(LIBNAME).a
+$(LIBFILE): $(LIBNAME).a
   ifeq ($(shell uname), Darwin)
 	$(CXX) -dynamiclib -o $@ -install_name @rpath/$@ $(LDFLAGS) $(OBJFILES) $(LDLIBS)
 	ln -sf $(shell pwd)/$@ $(KALDILIBDIR)/$@
@@ -41,7 +47,8 @@ ifeq ($(KALDI_FLAVOR), dynamic)
   else  # Platform not supported
 	$(error Dynamic libraries not supported on this platform. Run configure with --static flag.)
   endif
-endif
+endif # ifeq ($(KALDI_FLAVOR), dynamic)
+endif # ifdef LIBNAME
 
 # By default (GNU) make uses the C compiler $(CC) for linking object files even
 # if they were compiled from a C++ source. Below redefinition forces make to
diff --git a/src/matrix/kaldi-blas.h b/src/matrix/kaldi-blas.h
index 5d25ab852bd..8a06540bba2 100644
--- a/src/matrix/kaldi-blas.h
+++ b/src/matrix/kaldi-blas.h
@@ -50,8 +50,8 @@
 
 #ifdef HAVE_ATLAS
   extern "C" {
-    #include <cblas.h>
-    #include <clapack.h>
+    #include "cblas.h"
+    #include "clapack.h"
   }
 #elif defined(HAVE_CLAPACK)
   #ifdef __APPLE__
@@ -74,7 +74,7 @@
       // from the tools/CLAPACK_include directory.
       #include <cblas.h>
       #include <f2c.h>
-      #include <clapack.h>  
+      #include <clapack.h>
 
       // get rid of macros from f2c.h -- these are dangerous.
       #undef abs
@@ -110,7 +110,7 @@
   #undef bit_clear
   #undef bit_set
 #else
-  #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"  
+  #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"
 #endif
 
 #ifdef HAVE_OPENBLAS
diff --git a/src/nnet/nnet-trnopts.h b/src/nnet/nnet-trnopts.h
index 12ad1b1cbb5..0a064e17fd4 100644
--- a/src/nnet/nnet-trnopts.h
+++ b/src/nnet/nnet-trnopts.h
@@ -52,7 +52,7 @@ struct NnetTrainOptions {
 
   // print for debug purposes
   friend std::ostream& operator<<(std::ostream& os, const NnetTrainOptions& opts) {
-    os << "RbmTrainOptions : "
+    os << "NnetTrainOptions : "
        << "learn_rate" << opts.learn_rate << ", "
        << "momentum" << opts.momentum << ", "
        << "l2_penalty" << opts.l2_penalty << ", "
diff --git a/src/nnet3/nnet-batch-compute.cc b/src/nnet3/nnet-batch-compute.cc
index 6db046796be..5da55d0f70d 100644
--- a/src/nnet3/nnet-batch-compute.cc
+++ b/src/nnet3/nnet-batch-compute.cc
@@ -135,7 +135,7 @@ NnetBatchComputer::GetHighestPriorityComputation(
     int32 *minibatch_size_out,
     std::vector<NnetInferenceTask*> *tasks) {
   tasks->clear();
-  std::unique_lock<std::mutex>(mutex_);
+  std::unique_lock<std::mutex> lock(mutex_);
   MapType::iterator iter = tasks_.begin(), end = tasks_.end(),
       best_iter = tasks_.end();
   double highest_priority = -std::numeric_limits<double>::infinity();
@@ -1094,7 +1094,7 @@ bool NnetBatchDecoder::GetOutput(
       return false;
     UtteranceOutput *this_output = pending_utts_.front();
     pending_utts_.pop_front();
-    if (this_output->compact_lat.NumStates() == 0) {
+    if (this_output->lat.NumStates() == 0) {
       delete this_output;
       // ... and continue round the loop, without returning any output to the
       // user for this utterance.  Something went wrong in decoding: for
diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc
index 1ff7daa01d1..c66dc347ce9 100644
--- a/src/nnet3/nnet-component-itf.cc
+++ b/src/nnet3/nnet-component-itf.cc
@@ -173,6 +173,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
     ans = new LstmNonlinearityComponent();
   } else if (component_type == "BatchNormComponent") {
     ans = new BatchNormComponent();
+  } else if (component_type == "BatchRenormComponent") {
+    ans = new BatchRenormComponent();
   } else if (component_type == "TimeHeightConvolutionComponent") {
     ans = new TimeHeightConvolutionComponent();
   } else if (component_type == "RestrictedAttentionComponent") {
diff --git a/src/nnet3/nnet-compute.h b/src/nnet3/nnet-compute.h
index 333ed3168b9..f96195ff146 100644
--- a/src/nnet3/nnet-compute.h
+++ b/src/nnet3/nnet-compute.h
@@ -119,7 +119,7 @@ class NnetComputer {
 
   // Version of GetOutput that calls Swap(), destroying the output stored inside
   // this object.  You should probably not use this if you plan to call
-  // Backward() on the same NnetComputer object, or it's a recurret
+  // Backward() on the same NnetComputer object, or it's a recurrent
   // computation-- it may lead to a crash.
   void GetOutputDestructive(const std::string &output_name,
                             CuMatrix<BaseFloat> *output);
diff --git a/src/nnet3/nnet-normalize-component.cc b/src/nnet3/nnet-normalize-component.cc
index d10c6fabd36..0384faf2293 100644
--- a/src/nnet3/nnet-normalize-component.cc
+++ b/src/nnet3/nnet-normalize-component.cc
@@ -3,6 +3,7 @@
 // Copyright      2015-2017  Johns Hopkins University (author: Daniel Povey)
 //                2015  Guoguo Chen
 //                2015  Daniel Galvez
+//                2018  Gaofeng Cheng (Institute of Acoustics, Chinese Academy of Sciences)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -341,7 +342,7 @@ void BatchNormComponent::InitFromConfig(ConfigLine *cfl) {
             y(i) = x(i) - mean
 
             var = 1/I \sum_i y(i)^2
-         rscale = sqrt(var + epsilon)^power   <---- For regular batchnorm, power == -0.5.
+         rscale = (var + epsilon)^power   <---- For regular batchnorm, power == -0.5.
            z(i) = target-rms * rscale * y(i)
 
 
@@ -378,7 +379,7 @@ void BatchNormComponent::InitFromConfig(ConfigLine *cfl) {
     mean' = \sum_i y'(i)
           = (target-rms * rscale * \sum_i z'(i))  +  (var_deriv_mod \sum_i z(i))
      [... and the 2nd term above is zero when summed over i, because \sum_i z(i) is zero, ...]
-          = target-rms * rscale * \sum_i z(i)
+          = target-rms * rscale * \sum_i z'(i)
  and:
     x'(i) =  z'(i) * target-rms * rscale   +    z(i) var_deriv_mod   -  1/I mean'
           =  z'(i) * target-rms * rscale   +    z(i) var_deriv_mod   -  1/I * target-rms * rscale * \sum_i z'(i)
@@ -675,6 +676,645 @@ void BatchNormComponent::ZeroStats() {
   }
 }
 
+void BatchRenormComponent::ComputeDerived() {
+  if (!test_mode_) {
+    offset_.Resize(0);
+    scale_.Resize(0);
+    return;
+  }
+  bool compute_prob_zero_iter = false;
+
+  if (count_ == 0.0) {
+    KALDI_WARN << "Test-mode is set but there is no data count.  "
+        "Creating random counts.  This only makes sense "
+        "in unit-tests (or compute_prob_*.0.log).  If you see this "
+        "elsewhere, something is very wrong.";
+    count_ = 1.0;
+    stats_sum_.SetRandn();
+    stats_sumsq_.SetRandn();
+    stats_sumsq_.AddVecVec(1.0, stats_sum_, stats_sum_, 1.0);
+    compute_prob_zero_iter = true;
+  }
+  offset_.Resize(block_dim_);
+  scale_.Resize(block_dim_);
+  if (compute_prob_zero_iter) {
+    offset_.CopyFromVec(stats_sum_);
+    offset_.Scale(-1.0 / count_);
+    // now offset_ is -mean.
+    scale_.CopyFromVec(stats_sumsq_);
+    scale_.Scale(1.0 / count_);
+    scale_.AddVecVec(-1.0, offset_, offset_, 1.0);
+    // now scale_ is variance.
+    // Mathematically the ApplyFloor statement should be a no-op; this is in case
+    // of numerical roundoff.
+    scale_.ApplyFloor(0.0);
+    scale_.Add(epsilon_);
+    BaseFloat power = -0.5;
+    scale_.ApplyPow(power);
+    // now scale_ = min(variance, epsilon)^power
+    // next, multiply by the target RMS (normally 1.0).
+    scale_.Scale(target_rms_);
+    offset_.MulElements(scale_);
+    // now offset_ is -(scale*mean).
+  } else {
+    offset_.CopyFromVec(moving_mean_);
+    scale_.CopyFromVec(moving_stddev_);
+    scale_.ApplyPow(-1.0);
+    offset_.MulElements(scale_);
+    offset_.Scale(-1.0);
+  }
+}
+
+void BatchRenormComponent::SetTestMode(bool test_mode) {
+  test_mode_ = test_mode;
+  ComputeDerived();
+}
+
+// for batch-renorm, target-rms should be 1.0
+void BatchRenormComponent::Check() const {
+  KALDI_ASSERT(dim_ > 0 && block_dim_ > 0 && dim_ % block_dim_ == 0 &&
+               epsilon_ > 0.0 && target_rms_ == 1.0 && r_max_ > 0 && d_max_ >= 0 && alpha_ >= 0.0);
+}
+
+BatchRenormComponent::BatchRenormComponent(const BatchRenormComponent &other):
+    dim_(other.dim_), block_dim_(other.block_dim_),
+    epsilon_(other.epsilon_), target_rms_(other.target_rms_),
+    test_mode_(other.test_mode_), count_(other.count_),
+    stats_sum_(other.stats_sum_), stats_sumsq_(other.stats_sumsq_),
+    training_begining_(other.training_begining_), 
+    r_max_(other.r_max_), d_max_(other.d_max_), average_count_(other.average_count_),
+    alpha_(other.alpha_), moving_mean_(other.moving_mean_),
+    moving_stddev_(other.moving_stddev_) {
+  ComputeDerived();
+  Check();
+}
+
+
+std::string BatchRenormComponent::Info() const {
+  std::ostringstream stream;
+  stream << Type() << ", dim=" << dim_ << ", block-dim=" << block_dim_
+         << ", epsilon=" << epsilon_ << ", target-rms=" << target_rms_
+         << ", count=" << count_
+         << ", test-mode=" << (test_mode_ ? "true" : "false");
+  if (count_ > 0) {
+    Vector<BaseFloat> mean(stats_sum_), var(stats_sumsq_);
+    mean.Scale(1.0 / count_);
+    var.Scale(1.0 / count_);
+    // subtract mean^2 from var.
+    var.AddVecVec(-1.0, mean, mean, 1.0);
+    var.ApplyFloor(0.0);
+    var.ApplyPow(0.5);  // make it the stddev.
+    stream << ", data-mean=" << SummarizeVector(mean)
+           << ", data-stddev=" << SummarizeVector(var);
+    Vector<BaseFloat> moving_mean_copy(moving_mean_), moving_stddev_copy(moving_stddev_);
+    stream << ", moving-mean=" << SummarizeVector(moving_mean_copy)
+           << ", moving-stddv=" << SummarizeVector(moving_stddev_copy);
+  }
+  return stream.str();
+}
+
+void BatchRenormComponent::InitFromConfig(ConfigLine *cfl) {
+  dim_ = -1;
+  block_dim_ = -1;
+  epsilon_ = 1.0e-03;
+  target_rms_ = 1.0;
+  test_mode_ = false;
+  training_begining_ = true;
+  r_max_ = 1.0;
+  d_max_ = 0.0;
+  alpha_ = 0.01;
+  
+  bool ok = cfl->GetValue("dim", &dim_);
+  cfl->GetValue("block-dim", &block_dim_);
+  cfl->GetValue("epsilon", &epsilon_);
+  cfl->GetValue("target-rms", &target_rms_);
+  cfl->GetValue("test-mode", &test_mode_);
+  cfl->GetValue("r-max", &r_max_);
+  cfl->GetValue("d-max", &d_max_);
+  cfl->GetValue("alpha", &alpha_);
+  if (!ok || dim_ <= 0) {
+    KALDI_ERR << "BatchRenormComponent must have 'dim' specified, and > 0";
+  }
+  if (block_dim_ == -1)
+    block_dim_ = dim_;
+  if (!(block_dim_ > 0 && dim_ % block_dim_ == 0 &&
+        epsilon_ > 0 && target_rms_ > 0))
+    KALDI_ERR << "Invalid configuration in BatchRenormComponent.";
+  if (cfl->HasUnusedValues())
+    KALDI_ERR << "Could not process these elements in initializer: "
+              << cfl->UnusedValues();
+  count_ = 0;
+  average_count_ = 1.0;
+  stats_sum_.Resize(block_dim_);
+  stats_sumsq_.Resize(block_dim_);
+  moving_stddev_.Resize(block_dim_);
+  moving_mean_.Resize(block_dim_);
+  if (test_mode_) {
+    ComputeDerived();
+  }
+}
+
+
+
+/*
+  BATCH-RENORM_MATH
+
+  This comment describes the equations involved in batch-renorm normalization, and
+  derives the forward and back-propagation.
+
+  For BatchRenorm we just set target-rms = 1.0.
+
+  This is all dimension-by-dimension, so we just imagine the inputs
+  are scalars x(i), for i=0 .. n-1.
+
+  FORWARD PASS:
+
+  Let 'power' be a constant, equal to -0.5 for regular batch-renorm.
+
+  To simplify the math we (conceptually, not physically) do the normalization in
+  two stages: first mean, then variance, so we have x(i) -> y(i) -> z(i).
+
+  The name 'rscale' means 'raw scale', meaning the scale before including
+  target-rms.  Later we'll define 'scale = target-rms * rscale', to make some
+  of the actual computations slightly more efficient.
+  It should be noted that we use target-rms = 1.0 for batch-renorm
+
+  clipped_r and clipped_d are the allowed correction terms of batch-renorm, which is
+  treated as constant for a given training setup. Back-prop is stopped through them.
+
+  Define:   mean = 1/I * sum_i x(i)
+            y(i) = x(i) - mean
+
+            var = 1/I \sum_i y(i)^2
+         rscale = clipped_r * (var + epsilon)^power   <---- For regular batch-renorm, power == -0.5.
+           z(i) = rscale * y(i) + clipped_d 
+
+
+  Most of the rest of this comment derives how to compute the derivatives.  If
+  you just want the formulas, please skip to the string 'BACKWARD PASS' below.
+
+  We'll use a notation where an apostrophe on something means (the derivative of
+  the objective function w.r.t. that thing), so y'(i) is df/dy(i), and so on.
+  We are given y'(i).  Propagating the derivatives backward:
+
+    rscale' = (sum_i y(i) z'(i))
+            = (sum_i (z(i) - clipped_d) * z'(i) ) / rscale
+
+  [ note: d(rscale)/d(var) = clipped_r * power * (var + epsilon)^{power - 1}
+                           = clipped_r^(1/power) * power * rscale^{(power-1)/power}  ]
+
+    var' = rscale' * clipped_r^(1/power) * power * rscale^{(power-1)/power}
+         = (sum_i (z(i) - clipped_d) * z'(i) ) / rscale * clipped_r^(1/power) * power * rscale^{(power-1)/power}
+         = (sum_i (z(i) - clipped_d) * z'(i) ) * clipped_r^(1/power) * power * rscale^(-1 / power)
+         = clipped_r^(1/power) * (power * (sum_i (z(i) - clipped_d) * z'(i)) * rscale^(-1 / power)
+
+  [note: the following formula is of the form "direct term" + "indirect term"]
+    y'(i) =  z'(i) * rscale   +    2/I y(i) var'
+
+  Now, the above is inconvenient because it contains y(i) which is an intermediate
+  quantity.  We reformulate in terms of z(i), using y(i) = (z(i) - clipped_d) / rscale, so:
+
+  defining
+   var_deriv_mod = 2/I * var' / rscale
+                 = clipped_r^(1/power) * 2/I * power * (sum_i (z(i) - clipped_d) * z'(i)) * rscale^{-(1+power)/power}
+ we have:
+    y'(i) =  z'(i) * rscale   +    2/I y(i) var'
+          =  z'(i) * rscale   +    2/I (z(i) - clipped_d) / rscale * var'
+          =  z'(i) * rscale   +    (z(i) - clipped_d) var_deriv_mod
+
+ Now,
+    mean' = \sum_i y'(i)
+          = rscale * \sum_i z'(i)  +  var_deriv_mod * \sum_i (z(i) - clipped_d)
+          [\sum_i z(i) = I * clipped_d]
+          =  rscale * \sum_i z'(i)
+ and:
+    x'(i) =  z'(i) * rscale   +    (z(i) - clipped_d) * var_deriv_mod   -  1/I mean'
+          =  z'(i) * rscale   +    (z(i) - clipped_d) * var_deriv_mod   -  1/I * rscale * \sum_i z'(i)
+          =  rscale * (z'(i) - 1/I * \sum_i z'(i)) +  (z(i) - clipped_d) var_deriv_mod
+
+    It will simplify the code if we define:
+    For batch-renorm, target-rms = 1.0, so scale == rscale.  This way, we can write as follows:
+
+  BACKWARD PASS (recap):
+   var_deriv_mod = clipped_r^(1/power) * 2/I * power * (sum_i (z(i) - clipped_d) * z'(i)) * rscale^{-(1+power)/power}
+                .. which for power = -0.5, simplifies to:
+   var_deriv_mod = -1.0 * (clipped_r)^(-2) * (1/I \sum_i (z(i) - clipped_d) * z'(i)) * scale
+
+           x'(i) = scale * (z'(i) - 1/I * \sum_i z'(i)) + (z(i) - clipped_d) var_deriv_mod
+*/
+void* BatchRenormComponent::Propagate(const ComponentPrecomputedIndexes *indexes,
+                                    const CuMatrixBase<BaseFloat> &in,
+                                    CuMatrixBase<BaseFloat> *out) const {
+  KALDI_ASSERT(SameDim(in, *out) &&
+               (in.NumCols() == dim_ || in.NumCols() == block_dim_));
+  if (in.NumCols() != block_dim_) {
+    // if block_dim_ != dim_, we recurse; this helps keep the main code
+    // simple.
+    KALDI_ASSERT(in.Stride() == in.NumCols() && out->Stride() == out->NumCols());
+    int32 ratio = dim_ / block_dim_, orig_rows = in.NumRows(),
+        orig_cols = in.NumCols(), new_rows = orig_rows * ratio,
+        new_cols = orig_cols / ratio;
+    CuSubMatrix<BaseFloat> in_reshaped(in.Data(), new_rows, new_cols, new_cols),
+        out_reshaped(out->Data(), new_rows, new_cols, new_cols);
+    return Propagate(indexes, in_reshaped, &out_reshaped);
+  }
+
+  // From this point, we can assume that the num-cols of 'in' and 'out'
+  // equals block_dim_.
+
+  if (!test_mode_) {
+    // search in the comment above for FORWARD PASS to see what is being
+    // implemented here.
+    // if this takes too much time due to multiple different CUDA calls,
+    // we'll consider making a single kernel for some of it.
+    Memo *memo = new Memo;
+    int32 num_frames = in.NumRows(), dim = block_dim_;
+    memo->num_frames = num_frames;
+    memo->mean_uvar_scale.Resize(8, dim);
+    CuSubVector<BaseFloat> mean(memo->mean_uvar_scale, 0),
+        uvar(memo->mean_uvar_scale, 1),
+        scale(memo->mean_uvar_scale, 2),
+        clipped_r(memo->mean_uvar_scale, 5),
+        clipped_d(memo->mean_uvar_scale, 6);
+
+    mean.AddRowSumMat(1.0 / num_frames, in, 0.0);
+    uvar.AddDiagMat2(1.0 / num_frames, in, kTrans, 0.0);
+    scale.CopyFromVec(uvar);
+
+    // by applying this scale at this point, we save a multiply later on.
+    BaseFloat var_scale = 1.0 / (target_rms_ * target_rms_);
+    scale.AddVecVec(-var_scale, mean, mean, var_scale);
+    // at this point, 'scale' contains just the variance (times target-rms^{-2}).
+    scale.ApplyFloor(0.0);
+    scale.Add(var_scale * epsilon_);
+    // Now 'scale' contains the variance floored to zero and then with epsilon
+    // added [both times 1/target-rms^2].
+    scale.ApplyPow(-0.5);
+    // now 'scale' is the actual scale we'll use.
+
+    // the next command will do no work if out == in, for in-place propagation.
+    out->CopyFromMat(in);
+    out->AddVecToRows(-1.0, mean, 1.0);
+    out->MulColsVec(scale);
+
+    if (!training_begining_) {
+      // update clipped update
+      CuVector<BaseFloat> moving_mean_copy(moving_mean_), moving_stddev_copy(moving_stddev_);
+      clipped_d.CopyFromVec(mean);
+      clipped_d.AddVec(-1.0, moving_mean_copy);
+      moving_stddev_copy.ApplyPow(-1.0);
+      clipped_d.MulElements(moving_stddev_copy);
+
+      CuVector<BaseFloat> stddv_tmpt(scale);
+      stddv_tmpt.ApplyPow(-1.0);
+      clipped_r.CopyFromVec(stddv_tmpt);
+      clipped_r.MulElements(moving_stddev_copy);
+
+      clipped_r.ApplyCeiling(r_max_);
+      clipped_r.ApplyFloor(1.0 / r_max_);
+      clipped_d.ApplyCeiling(d_max_);
+      clipped_d.ApplyFloor(- d_max_);
+
+      out->MulColsVec(clipped_r);
+      out->AddVecToRows(1.0, clipped_d, 1.0);
+    } else {
+      clipped_r.Set(1);
+      clipped_d.Set(0);  
+    }
+    return static_cast<void*>(memo);
+  } else {
+    if (offset_.Dim() != block_dim_) {
+      if (count_ == 0)
+        KALDI_ERR << "Test mode set in BatchRenormComponent, but no stats.";
+      else  // why was ComputeDerived() not called?
+        KALDI_ERR << "Code error in BatchRenormComponent";
+    }
+    out->CopyFromMat(in);
+    out->MulColsVec(scale_);
+    out->AddVecToRows(1.0, offset_, 1.0);
+    return NULL;
+  }
+}
+
+void BatchRenormComponent::Backprop(
+    const std::string &debug_info,
+    const ComponentPrecomputedIndexes *indexes,
+    const CuMatrixBase<BaseFloat> &in_value,  // unused
+    const CuMatrixBase<BaseFloat> &out_value,
+    const CuMatrixBase<BaseFloat> &out_deriv,
+    void *memo_in,
+    Component *to_update,  // unused
+    CuMatrixBase<BaseFloat> *in_deriv) const {
+
+  KALDI_ASSERT(SameDim(out_value, out_deriv) &&
+               SameDim(out_value, *in_deriv) &&
+               (out_value.NumCols() == dim_ ||
+                out_value.NumCols() == block_dim_));
+  if (out_value.NumCols() != block_dim_) {
+    // if block_dim_ != dim_, we recurse; this helps keep the main code
+    // simple.
+    KALDI_ASSERT(out_value.Stride() == out_value.NumCols() &&
+                 out_deriv.Stride() == out_deriv.NumCols() &&
+                 in_deriv->Stride() == in_deriv->NumCols());
+    int32 ratio = dim_ / block_dim_,
+        orig_rows = out_value.NumRows(),
+        orig_cols = out_value.NumCols(),
+        new_rows = orig_rows * ratio, new_cols = orig_cols / ratio;
+    CuSubMatrix<BaseFloat> out_value_reshaped(out_value.Data(), new_rows,
+                                              new_cols, new_cols),
+        out_deriv_reshaped(out_deriv.Data(), new_rows, new_cols, new_cols),
+        in_deriv_reshaped(in_deriv->Data(), new_rows, new_cols, new_cols);
+    // we'll never use in_value, so pass it in unchanged.
+    Backprop(debug_info, indexes, in_value,
+             out_value_reshaped, out_deriv_reshaped,
+             memo_in, to_update, &in_deriv_reshaped);
+    return;
+  }
+
+  Memo *memo = static_cast<Memo*>(memo_in);
+
+  if (!test_mode_) {
+    // search above for BACKWARD PASS for a comment describing the math.
+    KALDI_ASSERT(memo != NULL && "memo not passed into backprop");
+    int32 num_frames = memo->num_frames;
+    KALDI_ASSERT(out_value.NumRows() == num_frames);
+    CuSubVector<BaseFloat>
+        scale(memo->mean_uvar_scale, 2),
+        var_deriv_mod(memo->mean_uvar_scale, 3),
+        temp(memo->mean_uvar_scale, 4),
+        clipped_r(memo->mean_uvar_scale, 5),
+        clipped_d(memo->mean_uvar_scale, 6);
+
+    // rscale == clipped_r * (var + epsilon)^power == clipped_r * scale_ !! this scale_ is from memo
+    // and target-rms = 1.0, so scale = rscale
+    scale.MulElements(clipped_r);
+
+    // var_deriv_mod is going to contain:
+    //  -1.0 * (clipped_r)^(-2) * (1/I \sum_i (z(i) - clipped_d) * z'(i)) * scale
+    // but for now we don't have the power of 'scale', we'll add that later.
+    BaseFloat coeff = -1.0 / num_frames;
+    // z(i) - clipped_d
+    CuMatrix<BaseFloat> out_value_minus_clipped_d(out_value);
+    out_value_minus_clipped_d.AddVecToRows(-1.0, clipped_d, 1.0);    
+
+    // -1.0 * (1/I \sum_i (z(i) - clipped_d) * z'(i))
+    var_deriv_mod.AddDiagMatMat(coeff, out_value_minus_clipped_d, kTrans,
+                                out_deriv, kNoTrans, 0.0);
+    // -1.0 * (1/I \sum_i (z(i) - clipped_d) * z'(i)) * scale
+    var_deriv_mod.MulElements(scale);
+    // -1.0 * (clipped_r)^(-2) * (1/I \sum_i (z(i) - clipped_d) * z'(i)) * scale
+    clipped_r.ApplyPow(-2.0);
+    var_deriv_mod.MulElements(clipped_r);
+    clipped_r.ApplyPow(-0.5);
+
+    temp.AddRowSumMat(-1.0 / num_frames, out_deriv, 0.0);
+    // the following statement does no work if in_deriv and out_deriv are the
+    // same matrix.
+    in_deriv->CopyFromMat(out_deriv);
+    in_deriv->AddVecToRows(1.0, temp);
+    // At this point, *in_deriv contains
+    // (z'(i) - 1/I * \sum_i z'(i))
+    in_deriv->MulColsVec(scale);
+    // At this point, *in_deriv contains
+    // scale * (z'(i) - 1/I * \sum_i z'(i))
+
+    in_deriv->AddMatDiagVec(1.0, out_value_minus_clipped_d, kNoTrans,
+                            var_deriv_mod, 1.0);
+
+    // At this point, *in_deriv contains what we described in the comment
+    // starting BATCHNORM_MATH as:
+    // x'(i) = scale * (z'(i) - 1/I * \sum_i z'(i)) + (z(i) - clipped_d) var_deriv_mod
+
+    // to scale the memo scale back to its original value
+    scale.DivElements(clipped_r);
+  } else {
+    KALDI_ASSERT(offset_.Dim() == block_dim_);
+    // the next call does no work if they point to the same memory.
+    in_deriv->CopyFromMat(out_deriv);
+    in_deriv->MulColsVec(scale_);
+  }
+}
+
+void BatchRenormComponent::StoreStats(
+    const CuMatrixBase<BaseFloat> &in_value,
+    const CuMatrixBase<BaseFloat> &out_value,
+    void *memo_in) {
+  // in test mode this component does not store stats, it doesn't provide the
+  // kStoresStats flag.
+  KALDI_ASSERT(!test_mode_);
+  KALDI_ASSERT(out_value.NumCols() == dim_ || out_value.NumCols() == block_dim_);
+  if (out_value.NumCols() != block_dim_) {
+    // if block_dim_ != dim_, we recurse; this helps keep the main code
+    // simple.
+    KALDI_ASSERT(out_value.Stride() == out_value.NumCols());
+    int32 ratio = dim_ / block_dim_,
+        orig_rows = out_value.NumRows(),
+        orig_cols = out_value.NumCols(),
+        new_rows = orig_rows * ratio, new_cols = orig_cols / ratio;
+    CuSubMatrix<BaseFloat> out_value_reshaped(out_value.Data(), new_rows,
+                                              new_cols, new_cols);
+    // we'll never use in_value, so just pass it in unchanged.
+    StoreStats(in_value, out_value_reshaped, memo_in);
+    return;
+  }
+
+  Memo *memo = static_cast<Memo*>(memo_in);
+  KALDI_ASSERT(out_value.NumRows() == memo->num_frames);
+
+  CuSubVector<BaseFloat> mean(memo->mean_uvar_scale, 0),
+      uvar(memo->mean_uvar_scale, 1),
+      scale(memo->mean_uvar_scale, 2);
+  KALDI_ASSERT(mean.Dim() == block_dim_ && memo->num_frames > 0);
+  BaseFloat num_frames = memo->num_frames;
+  if (stats_sum_.Dim() != block_dim_) {
+    stats_sum_.Resize(block_dim_);
+    stats_sumsq_.Resize(block_dim_);
+    moving_mean_.Resize(block_dim_);
+    moving_stddev_.Resize(block_dim_);
+    KALDI_ASSERT(count_ == 0);
+  }
+  count_ += num_frames;
+  stats_sum_.AddVec(num_frames, mean, 1.0);
+  stats_sumsq_.AddVec(num_frames, uvar, 1.0);
+  if (training_begining_) {
+    training_begining_ = false;
+    moving_mean_.CopyFromVec(mean);
+    scale.ApplyPow(-1.0);
+    moving_stddev_.CopyFromVec(scale);
+    scale.ApplyPow(-1.0);
+  } else {
+    BaseFloat alpha_2 = 1.0 - alpha_;
+
+    moving_mean_.Scale(alpha_2);
+    moving_mean_.AddVec(alpha_, mean);
+
+    scale.ApplyPow(-1.0);
+    moving_stddev_.Scale(alpha_2);
+    moving_stddev_.AddVec(alpha_, scale);
+    scale.ApplyPow(-1.0);
+  }
+}
+
+void BatchRenormComponent::Read(std::istream &is, bool binary) {
+  ExpectOneOrTwoTokens(is, binary, "<BatchRenormComponent>", "<Dim>");
+  ReadBasicType(is, binary, &dim_);
+  ExpectToken(is, binary, "<BlockDim>");
+  ReadBasicType(is, binary, &block_dim_);
+  ExpectToken(is, binary, "<Epsilon>");
+  ReadBasicType(is, binary, &epsilon_);
+  ExpectToken(is, binary, "<TargetRms>");
+  ReadBasicType(is, binary, &target_rms_);
+  ExpectToken(is, binary, "<TestMode>");
+  ReadBasicType(is, binary, &test_mode_);
+  ExpectToken(is, binary, "<Count>");
+  ReadBasicType(is, binary, &count_);
+  ExpectToken(is, binary, "<StatsMean>");
+  stats_sum_.Read(is, binary);
+  ExpectToken(is, binary, "<StatsVar>");
+  stats_sumsq_.Read(is, binary);
+  stats_sumsq_.AddVecVec(1.0, stats_sum_, stats_sum_, 1.0);
+  stats_sum_.Scale(count_);
+  stats_sumsq_.Scale(count_);
+  ExpectToken(is, binary, "<TrainingBegin>");
+  ReadBasicType(is, binary, &training_begining_);
+  ExpectToken(is, binary, "<RMaxCorrection>");
+  ReadBasicType(is, binary, &r_max_);
+  ExpectToken(is, binary, "<DMaxCorrection>");
+  ReadBasicType(is,  binary, &d_max_);
+  ExpectToken(is, binary, "<Alpha>");
+  ReadBasicType(is, binary, &alpha_);
+  ExpectToken(is, binary, "<MovingMean>");
+  moving_mean_.Read(is, binary);
+  ExpectToken(is, binary, "<MovingStddev>");
+  moving_stddev_.Read(is, binary);
+  ExpectToken(is, binary, "<AverageCount>");
+  ReadBasicType(is, binary, &average_count_);
+  ExpectToken(is, binary, "</BatchRenormComponent>");
+  ComputeDerived();
+  Check();
+}
+
+void BatchRenormComponent::Write(std::ostream &os, bool binary) const {
+  Check();
+  WriteToken(os, binary, "<BatchRenormComponent>");
+  WriteToken(os, binary, "<Dim>");
+  WriteBasicType(os, binary, dim_);
+  WriteToken(os, binary, "<BlockDim>");
+  WriteBasicType(os, binary, block_dim_);
+  WriteToken(os, binary, "<Epsilon>");
+  WriteBasicType(os, binary, epsilon_);
+  WriteToken(os, binary, "<TargetRms>");
+  WriteBasicType(os, binary, target_rms_);
+  WriteToken(os, binary, "<TestMode>");
+  WriteBasicType(os, binary, test_mode_);
+  WriteToken(os, binary, "<Count>");
+  WriteBasicType(os, binary,  count_);
+  CuVector<BaseFloat> mean(stats_sum_), var(stats_sumsq_);
+  if (count_ != 0) {
+    mean.Scale(1.0 / count_);
+    var.Scale(1.0 / count_);
+    var.AddVecVec(-1.0, mean, mean, 1.0);
+  }
+  WriteToken(os, binary, "<StatsMean>");
+  mean.Write(os, binary);
+  WriteToken(os, binary, "<StatsVar>");
+  var.Write(os, binary);
+  WriteToken(os, binary, "<TrainingBegin>");
+  WriteBasicType(os, binary, training_begining_);
+  WriteToken(os, binary, "<RMaxCorrection>");
+  WriteBasicType(os, binary, r_max_);
+  WriteToken(os, binary, "<DMaxCorrection>");
+  WriteBasicType(os,  binary, d_max_);
+  WriteToken(os, binary, "<Alpha>");
+  WriteBasicType(os, binary, alpha_);
+  WriteToken(os, binary, "<MovingMean>");
+  moving_mean_.Write(os, binary);
+  WriteToken(os, binary, "<MovingStddev>");
+  moving_stddev_.Write(os, binary);
+  WriteToken(os, binary, "<AverageCount>");
+  WriteBasicType(os, binary, average_count_);
+  WriteToken(os, binary, "</BatchRenormComponent>");
+}
+
+void BatchRenormComponent::Scale_Training(BaseFloat scale) {
+  KALDI_WARN << "Scale during training : "<< scale;
+  if (scale == 0) {
+    count_ = 0.0;
+    average_count_ = 0.0;
+    stats_sum_.SetZero();
+    stats_sumsq_.SetZero();
+    moving_mean_.SetZero();
+    moving_stddev_.SetZero();
+  } else {
+    count_ *= scale;
+    stats_sum_.Scale(scale);
+    stats_sumsq_.Scale(scale);
+  }
+}
+
+void BatchRenormComponent::Scale(BaseFloat scale) {
+  KALDI_WARN << "Scale during averaging : " << scale;
+  if (scale == 0) {
+    count_ = 0.0;
+    average_count_ = 0.0;
+    stats_sum_.SetZero();
+    stats_sumsq_.SetZero();
+    moving_mean_.SetZero();
+    moving_stddev_.SetZero();
+  } else {
+    count_ *= scale;
+    average_count_ *= scale;
+    stats_sum_.Scale(scale);
+    stats_sumsq_.Scale(scale);
+    moving_mean_.Scale(scale);
+    moving_stddev_.Scale(scale);
+  }
+}
+
+void BatchRenormComponent::Add(BaseFloat alpha, const Component &other_in) {
+  const BatchRenormComponent *other =
+      dynamic_cast<const BatchRenormComponent*>(&other_in);
+  count_ += alpha * other->count_;
+  stats_sum_.AddVec(alpha, other->stats_sum_);
+  stats_sumsq_.AddVec(alpha, other->stats_sumsq_);
+  
+  KALDI_WARN << "Average_count : " << average_count_;
+  KALDI_WARN << "Other Average_count : " << other->average_count_;
+  KALDI_WARN << "Add alpha scale : "<< alpha;
+  double average_count_copy(average_count_);
+  CuVector<BaseFloat> moving_mean_copy(moving_mean_), moving_stddev_copy(moving_stddev_);
+  KALDI_WARN << "Moving mean copy : " << SummarizeVector(moving_mean_copy);
+  KALDI_WARN << "Moving stddev copy : "<< SummarizeVector(moving_stddev_copy); 
+  average_count_ += alpha * other->average_count_;
+  moving_mean_.AddVec(alpha, other->moving_mean_);
+  moving_stddev_.AddVec(alpha, other->moving_stddev_);
+  moving_mean_.Scale(1.0 / average_count_);
+  moving_stddev_.Scale(1.0 / average_count_);
+  KALDI_WARN << "Moving mean copy after: " << SummarizeVector(moving_mean_copy);
+  KALDI_WARN << "Moving stddev copy after: "<< SummarizeVector(moving_stddev_copy); 
+  average_count_ = average_count_copy;
+  // this operation might change offset_ and scale_, so we recompute them
+  // in this instance (but not in Scale()).
+  ComputeDerived();
+}
+
+void BatchRenormComponent::ZeroStats() {
+  // We only zero the stats if we're not in test mode.  In test mode, this would
+  // be dangerous as the stats are the source for the transform, and zeroing
+  // them and then calling ComputeDerived() again would remove the transform
+  // parameters (offset_ and scale_).
+  if (!test_mode_) {
+    count_ = 0.0;
+    average_count_ = 1.0;
+    stats_sum_.SetZero();
+    stats_sumsq_.SetZero();
+  }
+}
+
+void BatchRenormComponent::SetBatchRenormCorrections(BaseFloat r_max, BaseFloat d_max) {
+   r_max_ = r_max;
+   d_max_ = d_max;
+ }
 
 } // namespace nnet3
 } // namespace kaldi
diff --git a/src/nnet3/nnet-normalize-component.h b/src/nnet3/nnet-normalize-component.h
index 37ad624d0f0..d17b3bf2ea3 100644
--- a/src/nnet3/nnet-normalize-component.h
+++ b/src/nnet3/nnet-normalize-component.h
@@ -7,6 +7,7 @@
 //           2014-2015  Guoguo Chen
 //                2015  Daniel Galvez
 //                2015  Tom Ko
+//                2018  Gaofeng Cheng (Institute of Acoustics, Chinese Academy of Sciences)
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -223,6 +224,7 @@ class BatchNormComponent: public Component {
   // Note: the offset and scale will only be nonempty in 'test mode'.
   const CuVector<BaseFloat> &Offset() const { return offset_; }
   const CuVector<BaseFloat> &Scale() const { return scale_; }
+  bool IsBatchRenorm() const { return false; }
 
  private:
 
@@ -294,6 +296,195 @@ class BatchNormComponent: public Component {
   CuVector<BaseFloat> scale_;
 };
 
+/*
+  BatchRenormComponent
+
+  This implements batch-renorm normalization; See details in : https://arxiv.org/abs/1702.03275 
+
+  If you want to combine this with the trainable offset and scale that the
+  original BatchNorm paper used, then follow this by the
+  ScaleAndOffsetComponent.
+
+  It's a simple component (uses the kSimpleComponent flag), but it is unusual in
+  that it will give different results if you call it on half the matrix at a
+  time.  Most of the time this would be pretty harmless, so we still return the
+  kSimpleComponent flag.  We may have to modify the test code a little to
+  account for this, or possibly remove the kSimpleComponent flag.  In some sense
+  each output Index depends on every input Index, but putting those dependencies
+  explicitly into the dependency-tracking framework as a GeneralComponent
+  would be very impractical and might lead to a lot of unnecessary things being
+  computed.  You have to be a bit careful where you put this component, and understand
+  what you're doing e.g. putting it in the path of a recurrence is a bit problematic
+  if the minibatch size is small.
+
+    Accepted configuration values:
+           dim          Dimension of the input and output
+           block-dim    Defaults to 'dim', but may be set to a divisor
+                        of 'dim'.  In this case, each block of dimension 'block-dim'
+                        is treated like a separate row of the input matrix, which
+                        means that the stats from n'th element of each
+                        block are pooled into one class, for each n.
+           epsilon      Small term added to the variance that is used to prevent
+                        division by zero
+           target-rms   This defaults to 1.0, but if set, for instance, to 2.0,
+                        it will normalize the standard deviation of the output to
+                        2.0. 'target-stddev' might be a more suitable name, but this
+                        was chosen for consistency with NormalizeComponent.
+           alpha        This is the decay-momentum used for the moving-averages, 
+                        see details in : https://arxiv.org/abs/1702.03275
+ */
+class BatchRenormComponent: public Component {
+ public:
+
+  BatchRenormComponent() { }
+
+  // call this with 'true' to set 'test mode' where the batch normalization is
+  // done with stored stats.  There won't normally be any need to specially
+  // accumulate these stats; they are stored as a matter of course on each
+  // iteration of training, as for NonlinearComponents, and we'll use the stats
+  // from the most recent [script-level] iteration.
+  // (Note: it will refuse to actually set test-mode to true if there
+  // are no stats stored.)
+  void SetTestMode(bool test_mode);
+
+  // constructor using another component
+  BatchRenormComponent(const BatchRenormComponent &other);
+
+  virtual int32 InputDim() const { return dim_; }
+  virtual int32 OutputDim() const { return dim_; }
+
+  virtual std::string Info() const;
+  virtual void InitFromConfig(ConfigLine *cfl);
+  virtual std::string Type() const { return "BatchRenormComponent"; }
+  virtual int32 Properties() const {
+    // If the block-dim is less than the dim, we need the input and output
+    // matrices to be contiguous (stride==num-cols), as we'll be reshaping
+    // internally.  This is not much of a cost, because this will be used
+    // in convnets where we have to do this anyway.
+    return kSimpleComponent|kBackpropNeedsOutput|kPropagateInPlace|
+        kBackpropInPlace|
+        (block_dim_ < dim_ ? kInputContiguous|kOutputContiguous : 0)|
+        (test_mode_ ? 0 : kUsesMemo|kStoresStats);
+  }
+  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
+                         const CuMatrixBase<BaseFloat> &in,
+                         CuMatrixBase<BaseFloat> *out) const;
+  virtual void Backprop(const std::string &debug_info,
+                        const ComponentPrecomputedIndexes *indexes,
+                        const CuMatrixBase<BaseFloat> &in_value,
+                        const CuMatrixBase<BaseFloat> &out_value,
+                        const CuMatrixBase<BaseFloat> &out_deriv,
+                        void *memo,
+                        Component *, // to_update,
+                        CuMatrixBase<BaseFloat> *in_deriv) const;
+
+  virtual void Read(std::istream &is, bool binary); // This Read function
+  // requires that the Component has the correct type.
+
+  /// Write component to stream
+  virtual void Write(std::ostream &os, bool binary) const;
+  virtual Component* Copy() const { return new BatchRenormComponent(*this); }
+
+  virtual void Scale(BaseFloat scale);
+  virtual void Scale_Training(BaseFloat scale);
+  virtual void Add(BaseFloat alpha, const Component &other);
+  virtual void ZeroStats();
+
+
+  virtual void DeleteMemo(void *memo) const { delete static_cast<Memo*>(memo); }
+
+  virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
+                          const CuMatrixBase<BaseFloat> &out_value,
+                          void *memo);
+  virtual void SetBatchRenormCorrections(BaseFloat r_max, BaseFloat d_max);
+
+  // Members specific to this component type.
+  // Note: the offset and scale will only be nonempty in 'test mode'.
+  const CuVector<BaseFloat> &Offset() const { return offset_; }
+  const CuVector<BaseFloat> &Scale() const { return scale_; }
+  bool IsBatchRenorm() const { return true; }
+
+ private:
+
+  struct Memo {
+    // number of frames (after any reshaping).
+    int32 num_frames;
+    // 'sum_sumsq_scale' is of dimension 5 by block_dim_:
+    // Row 0 = mean = the mean of the rows of the input
+    // Row 1 = uvar = the uncentered variance of the input (= sumsq / num_frames).
+    // Row 2 = scale = the scale of the renormalization.
+    // Rows 3 and 4 are used as temporaries in Backprop.
+    CuMatrix<BaseFloat> mean_uvar_scale;
+  };
+
+  void Check() const;
+
+  // this function is used in a couple of places; it turns the raw stats into
+  // the offset/scale term of a normalizing transform.
+  static void ComputeOffsetAndScale(double count,
+                                    BaseFloat epsilon,
+                                    const Vector<double> &stats_sum,
+                                    const Vector<double> &stats_sumsq,
+                                    Vector<BaseFloat> *offset,
+                                    Vector<BaseFloat> *scale);
+  // computes derived parameters offset_ and scale_.
+  void ComputeDerived();
+
+  // Dimension of the input and output.
+  int32 dim_;
+  // This would normally be the same as dim_, but if it's less (and it must be >
+  // 0 and must divide dim_), then each separate block of the input of dimension
+  // 'block_dim_' is treated like a separate frame for the purposes of
+  // normalization.  This can be used to implement spatial batch normalization
+  // for convolutional setups-- assuming the filter-dim has stride 1, which it
+  // always will in the new code in nnet-convolutional-component.h.
+  int32 block_dim_;
+
+  // Used to avoid exact-zero variances, epsilon has the dimension of a
+  // covariance.
+  BaseFloat epsilon_;
+
+  // This value will normally be 1.0, which is the default, but you can set it
+  // to other values as a way to control how fast the following layer learns
+  // (smaller -> slower).  The same config exists in NormalizeComponent.
+  BaseFloat target_rms_;
+
+  // This is true if we want the batch normalization to operate in 'test mode'
+  // meaning the data mean and stddev used for the normalization are fixed
+  // quantities based on previously accumulated stats.  Note: the stats we use
+  // for this are based on the same 'StoreStats' mechanism as we use for
+  // components like SigmoidComponent and ReluComponent; we'll be using
+  // the stats from the most recent [script-level] iteration of training.
+  bool test_mode_;
+
+  double average_count_;
+  // total count of stats stored by StoreStats().
+  double count_;
+  // sum-of-data component of stats of input data.
+  CuVector<double> stats_sum_;
+  // sum-of-squared component of stats of input data.
+  CuVector<double> stats_sumsq_;
+
+  //
+  bool training_begining_;
+
+  // the maximum allowed correction for batch renorm.
+  // The correction `(r, d)` is used as `corrected_value = normalized_value * r + d`,
+  // with `r` clipped to [1 / rmax, rmax], and `d` to [-dmax, dmax]. 
+  BaseFloat r_max_;
+  BaseFloat d_max_;
+
+  // decay-factor for the moving averages.
+  BaseFloat alpha_;
+
+  CuVector<double> moving_mean_;
+  CuVector<double> moving_stddev_;
+
+  // [TODO] : a more robust method to generate scale_ and offset_ for inference
+  // temporarily, offset_ and scale_ is picked from one of the parallel models
+  CuVector<BaseFloat> offset_;
+  CuVector<BaseFloat> scale_;
+};
 
 
 } // namespace nnet3
diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc
index e020f8fc6a7..d8002fe3d42 100644
--- a/src/nnet3/nnet-utils.cc
+++ b/src/nnet3/nnet-utils.cc
@@ -301,6 +301,22 @@ void SetNnetAsGradient(Nnet *nnet) {
 }
 
 void ScaleNnet(BaseFloat scale, Nnet *nnet) {
+  if (scale == 1.0) return;
+  else {
+    for (int32 c = 0; c < nnet->NumComponents(); c++) {
+      Component *comp = nnet->GetComponent(c);
+      BatchRenormComponent *bc = dynamic_cast<BatchRenormComponent*>(comp);
+      if (bc != NULL) {
+        bc->Scale_Training(scale);
+      } else {
+        Component *comp = nnet->GetComponent(c);
+        comp->Scale(scale);
+      }
+    }
+  }
+}
+
+void ScaleNnetForAverage(BaseFloat scale, Nnet *nnet) {
   if (scale == 1.0) return;
   else {
     for (int32 c = 0; c < nnet->NumComponents(); c++) {
@@ -520,6 +536,9 @@ bool HasBatchnorm(const Nnet &nnet) {
     const Component *comp = nnet.GetComponent(c);
     if (dynamic_cast<const BatchNormComponent*>(comp) != NULL)
       return true;
+    comp = nnet.GetComponent(c);
+    if (dynamic_cast<const BatchRenormComponent*>(comp) != NULL)
+      return true;
   }
   return false;
 }
@@ -532,8 +551,15 @@ void ScaleBatchnormStats(BaseFloat batchnorm_stats_scale,
   for (int32 c = 0; c < nnet->NumComponents(); c++) {
     Component *comp = nnet->GetComponent(c);
     BatchNormComponent *bc = dynamic_cast<BatchNormComponent*>(comp);
-    if (bc != NULL)
+    if (bc != NULL) {
       bc->Scale(batchnorm_stats_scale);
+    } else {
+      comp = nnet->GetComponent(c);
+      BatchRenormComponent *bc = dynamic_cast<BatchRenormComponent*>(comp);
+      if (bc != NULL) {
+        bc->Scale_Training(batchnorm_stats_scale);
+      }
+    }
   }
 }
 
@@ -556,8 +582,15 @@ void SetBatchnormTestMode(bool test_mode,  Nnet *nnet) {
   for (int32 c = 0; c < nnet->NumComponents(); c++) {
     Component *comp = nnet->GetComponent(c);
     BatchNormComponent *bc = dynamic_cast<BatchNormComponent*>(comp);
-    if (bc != NULL)
+    if (bc != NULL) {
       bc->SetTestMode(test_mode);
+    } else {
+      comp = nnet->GetComponent(c);
+      BatchRenormComponent *bc = dynamic_cast<BatchRenormComponent*>(comp);
+      if (bc != NULL) {
+        bc->SetTestMode(test_mode);
+      }
+    }
   }
 }
 
@@ -1641,18 +1674,32 @@ class ModelCollapser {
     const BatchNormComponent *batchnorm_component =
         dynamic_cast<const BatchNormComponent*>(
             nnet_->GetComponent(component_index1));
-    if (batchnorm_component == NULL)
+    const BatchRenormComponent *batchrenorm_component =
+      dynamic_cast<const BatchRenormComponent*>(
+          nnet_->GetComponent(component_index1));
+    if (batchnorm_component != NULL && batchrenorm_component != NULL) {
+      KALDI_ERR << "Something seems very wrong, a component belongs to both batch-norm and batch-renorm ?";
+    } else if (batchnorm_component == NULL && batchrenorm_component == NULL) {
       return -1;
-
-    if (batchnorm_component->Offset().Dim() == 0) {
-      KALDI_ERR << "Expected batch-norm components to have test-mode set.";
+    } else if (batchnorm_component != NULL) {
+      if (batchnorm_component->Offset().Dim() == 0) {
+        KALDI_ERR << "Expected batch-norm components to have test-mode set.";
+      }
+      std::string batchnorm_component_name = nnet_->GetComponentName(component_index1);
+      return GetDiagonallyPreModifiedComponentIndex(batchnorm_component->Offset(),
+                                                    batchnorm_component->Scale(),
+                                                    batchnorm_component_name,
+                                                    component_index2);
+    } else {
+      if (batchrenorm_component->Offset().Dim() == 0) {
+        KALDI_ERR << "Expected batch-norm components to have test-mode set.";
+      }
+      std::string batchrenorm_component_name = nnet_->GetComponentName(component_index1);
+      return GetDiagonallyPreModifiedComponentIndex(batchrenorm_component->Offset(),
+                                                    batchrenorm_component->Scale(),
+                                                    batchrenorm_component_name,
+                                                    component_index2);
     }
-    std::string batchnorm_component_name = nnet_->GetComponentName(
-        component_index1);
-    return GetDiagonallyPreModifiedComponentIndex(batchnorm_component->Offset(),
-                                                  batchnorm_component->Scale(),
-                                                  batchnorm_component_name,
-                                                  component_index2);
   }
 
 
diff --git a/src/nnet3bin/nnet3-average.cc b/src/nnet3bin/nnet3-average.cc
index d794e37e50d..face910a654 100644
--- a/src/nnet3bin/nnet3-average.cc
+++ b/src/nnet3bin/nnet3-average.cc
@@ -61,7 +61,7 @@ void ReadModels(std::vector<std::pair<std::string, BaseFloat> > models_and_weigh
   try {
     int32 n = models_and_weights.size();
     ReadKaldiObject(models_and_weights[0].first, output_nnet);
-    ScaleNnet(models_and_weights[0].second, output_nnet);
+    ScaleNnetForAverage(models_and_weights[0].second, output_nnet);
     for (int32 i = 1; i < n; i++) {
       Nnet nnet;
       ReadKaldiObject(models_and_weights[i].first, &nnet);
diff --git a/src/nnet3bin/nnet3-combine.cc b/src/nnet3bin/nnet3-combine.cc
index 4bcf4cdfb6d..1050148fbd6 100644
--- a/src/nnet3bin/nnet3-combine.cc
+++ b/src/nnet3bin/nnet3-combine.cc
@@ -66,7 +66,7 @@ double ComputeObjf(bool batchnorm_test_mode, bool dropout_test_mode,
 void UpdateNnetMovingAverage(int32 num_models,
     const Nnet &nnet, Nnet *moving_average_nnet) {
   KALDI_ASSERT(NumParameters(nnet) == NumParameters(*moving_average_nnet));
-  ScaleNnet((num_models - 1.0) / num_models, moving_average_nnet);
+  ScaleNnetForAverage((num_models - 1.0) / num_models, moving_average_nnet);
   AddNnet(nnet, 1.0 / num_models, moving_average_nnet);
 }
 
@@ -106,7 +106,7 @@ int main(int argc, char *argv[]) {
                 "if the number of models provided to this binary is quite "
                 "large (e.g. several hundred)."); 
     po.Register("batchnorm-test-mode", &batchnorm_test_mode,
-                "If true, set test-mode to true on any BatchNormComponents "
+                "If true, set test-mode to true on any BatchNormComponents or BatchRenormComponents"
                 "while evaluating objectives.");
     po.Register("dropout-test-mode", &dropout_test_mode,
                 "If true, set test-mode to true on any DropoutComponents and "
diff --git a/src/online2/online-gmm-decodable.h b/src/online2/online-gmm-decodable.h
index c037ad0efe4..1a1d37ba2a2 100644
--- a/src/online2/online-gmm-decodable.h
+++ b/src/online2/online-gmm-decodable.h
@@ -24,8 +24,10 @@
 #define KALDI_ONLINE2_ONLINE_GMM_DECODABLE_H_
 
 #include "itf/online-feature-itf.h"
-#include "gmm/decodable-am-diag-gmm.h"
 #include "matrix/matrix-lib.h"
+#include "itf/decodable-itf.h"
+#include "gmm/am-diag-gmm.h"
+#include "hmm/transition-model.h"
 
 namespace kaldi {
 
@@ -37,20 +39,20 @@ class DecodableDiagGmmScaledOnline : public DecodableInterface {
                                const BaseFloat scale,
                                OnlineFeatureInterface *input_feats);
 
-  
+
   /// Returns the scaled log likelihood
   virtual BaseFloat LogLikelihood(int32 frame, int32 index);
-  
+
   virtual bool IsLastFrame(int32 frame) const;
 
-  virtual int32 NumFramesReady() const;  
-  
+  virtual int32 NumFramesReady() const;
+
   /// Indices are one-based!  This is for compatibility with OpenFst.
   virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
 
  private:
   void CacheFrame(int32 frame);
-  
+
   OnlineFeatureInterface *features_;
   const AmDiagGmm &ac_model_;
   BaseFloat ac_scale_;
diff --git a/src/online2/online-speex-wrapper.cc b/src/online2/online-speex-wrapper.cc
index 0af5bd90bd0..e41a812ca32 100644
--- a/src/online2/online-speex-wrapper.cc
+++ b/src/online2/online-speex-wrapper.cc
@@ -18,7 +18,7 @@
 // limitations under the License.
 
 #include <cstring>
-#include "online-speex-wrapper.h"
+#include "online2/online-speex-wrapper.h"
 
 namespace kaldi {
 
diff --git a/src/online2/onlinebin-util.cc b/src/online2/onlinebin-util.cc
index 74c594eeb79..f143ebbc5f7 100644
--- a/src/online2/onlinebin-util.cc
+++ b/src/online2/onlinebin-util.cc
@@ -20,7 +20,7 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.
 
-#include "onlinebin-util.h"
+#include "online2/onlinebin-util.h"
 
 namespace kaldi {
 
diff --git a/src/tree/tree-renderer.cc b/src/tree/tree-renderer.cc
index cbc2ce05677..bbaa5cda162 100644
--- a/src/tree/tree-renderer.cc
+++ b/src/tree/tree-renderer.cc
@@ -19,6 +19,8 @@
 
 #include "tree/tree-renderer.h"
 
+#include "tree/context-dep.h"
+
 namespace kaldi {
 const int32 TreeRenderer::kEdgeWidth = 1;
 const int32 TreeRenderer::kEdgeWidthQuery = 3;
diff --git a/src/tree/tree-renderer.h b/src/tree/tree-renderer.h
index 5e0b0d89198..78f4b9aa403 100644
--- a/src/tree/tree-renderer.h
+++ b/src/tree/tree-renderer.h
@@ -23,7 +23,6 @@
 #include "base/kaldi-common.h"
 #include "tree/event-map.h"
 #include "util/common-utils.h"
-#include "hmm/transition-model.h"
 #include "fst/fstlib.h"
 
 namespace kaldi {
diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh
index 52332dfed05..1b63c4c99d9 100755
--- a/tools/extras/check_dependencies.sh
+++ b/tools/extras/check_dependencies.sh
@@ -66,7 +66,7 @@ if ! echo "#include <zlib.h>" | $CXX -E - >&/dev/null; then
   add_packages zlib-devel zlib1g-dev zlib-devel
 fi
 
-for f in make automake autoconf patch grep bzip2 gzip wget git sox; do
+for f in make automake autoconf patch grep bzip2 gzip unzip wget git sox; do
   if ! which $f >&/dev/null; then
     echo "$0: $f is not installed."
     add_packages $f $f $f