diff --git a/egs/tunisian_msa/s5/README b/egs/tunisian_msa/s5/README
new file mode 100644
index 00000000000..ae2aa2bc452
--- /dev/null
+++ b/egs/tunisian_msa/s5/README
@@ -0,0 +1,24 @@
+A Kaldi recipe for Arabic using the Tunisian_MSA  corpus.
+
+Extra Requirements:
+This recipe uses the QCRI lexicon which uses the Buckwalter encoding.
+In order to convert the Buckwalter to utf-8, the Encode::Arabic::Buckwalter perl module is required.
+On ubuntu install the package: libencode-arabic-perl.
+On Mac OSX use cpanm (cpanminus) to install the perl module.
+
+Description of the Tunisian_MSA Corpus
+The Tunisian_MSA corpus was originally collected to train acoustic models for pronunciation modeling in Arabic language learning applications.
+The data collection took place near Tunis the capital of the Republic of Tunisia in 2003 at the Military Academy of Fondouk Jedied . 
+The Tunisian_MSA  corpus is divided into recited  and prompted speech  subcorpora.
+The  recited speech appears under the recordings directory and the prompted speech under the answers directory.
+Each of the 118 informants contributed to both subcorpora by reciting sentences and providing answers to prompted questions. 
+The Tunisian_MSA corpus  has   11.2 hours of speech.
+
+With the exception of speech from two speakers , all the corpus was used for training.
+
+A small corpus was collected for testing.
+
+A pronunciation dictionary is also available from openslrm.org.
+It covers all the words uttered in the Tunisian_MSA corpus and the test corpus.
+The QCRI lexicon was used as a starting point for writing this lexicon.
+The phones are the same as those used in the QCRI lexicon.
diff --git a/egs/tunisian_msa/s5/cmd.sh b/egs/tunisian_msa/s5/cmd.sh
new file mode 100644
index 00000000000..71dd849a93b
--- /dev/null
+++ b/egs/tunisian_msa/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/tunisian_msa/s5/conf/mfcc.conf b/egs/tunisian_msa/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..7361509099f
--- /dev/null
+++ b/egs/tunisian_msa/s5/conf/mfcc.conf
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/tunisian_msa/s5/conf/mfcc_hires.conf b/egs/tunisian_msa/s5/conf/mfcc_hires.conf
new file mode 100644
index 00000000000..434834a6725
--- /dev/null
+++ b/egs/tunisian_msa/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
diff --git a/egs/tunisian_msa/s5/conf/online_cmvn.conf b/egs/tunisian_msa/s5/conf/online_cmvn.conf
new file mode 100644
index 00000000000..7748a4a4dd3
--- /dev/null
+++ b/egs/tunisian_msa/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/tunisian_msa/s5/conf/pitch.conf b/egs/tunisian_msa/s5/conf/pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/tunisian_msa/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/tunisian_msa/s5/conf/plp.conf b/egs/tunisian_msa/s5/conf/plp.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs/tunisian_msa/s5/conf/plp.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/tunisian_msa/s5/local/answers_make_lists.pl b/egs/tunisian_msa/s5/local/answers_make_lists.pl
new file mode 100755
index 00000000000..55ee5751d9b
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/answers_make_lists.pl
@@ -0,0 +1,77 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# answers_make_lists.pl - make acoustic model training lists
+
+use strict;
+use warnings;
+use Carp;
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+my $tmpdir = 'data/local/tmp/tunis';
+
+system "mkdir -p $tmpdir/answers";
+
+# input wav file list
+my $wav_list = "$tmpdir/answers_wav.txt";
+
+# output temporary wav.scp files
+my $wav_scp = "$tmpdir/answers/wav.scp";
+
+# output temporary utt2spk files
+my $u = "$tmpdir/answers/utt2spk";
+
+# output temporary text files
+my $t = "$tmpdir/answers/text";
+
+# initialize hash for prompts
+my %prompt = ();
+
+# store prompts in hash
+LINEA: while ( my $line = <> ) {
+    chomp $line;
+    my ($num,$sent) = split /\t/sxm, $line, 2;
+
+    my ($machine,$s,$mode,$language,$i) = split /\_/sxm, $num;
+    # the utterance name
+    my $utt = $machine . '_' . $s . '_' . 'a' . '_' . $i;
+    $prompt{$utt} = $sent;
+}
+
+# Write wav.scp, utt2spk and text files.
+open my $W, '<', $wav_list or croak "problem with $wav_list $!";
+open my $O, '+>', $wav_scp or croak "problem with $wav_scp $!";
+open my $U, '+>', $u or croak "problem with $u";
+open my $T, '+>', $t or croak "problem with $t";
+
+ LINE: while ( my $line = <$W> ) {
+     chomp $line;
+     next LINE if ( $line !~ /Answers/sxm );
+     next LINE if ( $line =~ /Recordings/sxm );
+     my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+     my @dirs = split /\//sxm, $directories;
+     my $r = basename $line, '.wav';
+     my $machine = $dirs[-3];
+     my $s = $dirs[-1];
+     my $rid = $machine . '_' . $s . '_' . 'a' . '_' . $r;
+     if ( exists $prompt{$rid} ) {
+	 print ${T} "$rid\t$prompt{$rid}\n" or croak;
+     } elsif ( defined $rid ) {
+	 print STDERR "problem\t$rid" or croak;
+	 next LINE;
+     } else {
+	 croak "$line";
+     }
+
+	print ${O} "$rid sox $line -t wav - |\n" or croak;
+     print ${U} "$rid ${machine}_${s}_a\n" or croak;
+}
+close $U or croak;
+close $T or croak;
+close $W or croak;
+close $O or croak;
diff --git a/egs/tunisian_msa/s5/local/buckwalter2unicode.py b/egs/tunisian_msa/s5/local/buckwalter2unicode.py
new file mode 100755
index 00000000000..94fec3225dd
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/buckwalter2unicode.py
@@ -0,0 +1,453 @@
+#!/usr/bin/python
+
+# buckwalter2unicode.py - A script to convert transliterated Arabic
+#                         (using the Buckwalter system) to Unicode.
+#
+# Version 0.2 - 15th September 2004
+# 
+# Andrew Roberts (andyr [at] comp (dot) leeds [dot] ac (dot) uk)
+#
+# Project homepage: http://www.comp.leeds.ac.uk/andyr/software/
+#
+# Now, listen carefully...
+#
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import sys, getopt, codecs, os, re
+
+# Declare a dictionary with Buckwalter's ASCII symbols as the keys, and
+# their unicode equivalents as values.
+
+buck2uni = {"'": u"\u0621", # hamza-on-the-line
+            "|": u"\u0622", # madda
+            ">": u"\u0623", # hamza-on-'alif
+            "&": u"\u0624", # hamza-on-waaw
+            "<": u"\u0625", # hamza-under-'alif
+            "}": u"\u0626", # hamza-on-yaa'
+            "A": u"\u0627", # bare 'alif
+            "b": u"\u0628", # baa'
+            "p": u"\u0629", # taa' marbuuTa
+            "t": u"\u062A", # taa'
+            "v": u"\u062B", # thaa'
+            "j": u"\u062C", # jiim
+            "H": u"\u062D", # Haa'
+            "x": u"\u062E", # khaa'
+            "d": u"\u062F", # daal
+            "*": u"\u0630", # dhaal
+            "r": u"\u0631", # raa'
+            "z": u"\u0632", # zaay
+            "s": u"\u0633", # siin
+            "$": u"\u0634", # shiin
+            "S": u"\u0635", # Saad
+            "D": u"\u0636", # Daad
+            "T": u"\u0637", # Taa'
+            "Z": u"\u0638", # Zaa' (DHaa')
+            "E": u"\u0639", # cayn
+            "g": u"\u063A", # ghayn
+            "_": u"\u0640", # taTwiil
+            "f": u"\u0641", # faa'
+            "q": u"\u0642", # qaaf
+            "k": u"\u0643", # kaaf
+            "l": u"\u0644", # laam
+            "m": u"\u0645", # miim
+            "n": u"\u0646", # nuun
+            "h": u"\u0647", # haa'
+            "w": u"\u0648", # waaw
+            "Y": u"\u0649", # 'alif maqSuura
+            "y": u"\u064A", # yaa'
+            "F": u"\u064B", # fatHatayn
+            "N": u"\u064C", # Dammatayn
+            "K": u"\u064D", # kasratayn
+            "a": u"\u064E", # fatHa
+            "u": u"\u064F", # Damma
+            "i": u"\u0650", # kasra
+            "~": u"\u0651", # shaddah
+            "o": u"\u0652", # sukuun
+            "`": u"\u0670", # dagger 'alif
+            "{": u"\u0671", # waSla
+}
+
+# For a reverse transliteration (Unicode -> Buckwalter), a dictionary
+# which is the reverse of the above buck2uni is essential.
+
+uni2buck = {}
+
+# Iterate through all the items in the buck2uni dict.
+for (key, value) in buck2uni.iteritems():
+		# The value from buck2uni becomes a key in uni2buck, and vice
+		# versa for the keys.
+		uni2buck[value] = key
+
+# Declare some global variables...
+
+
+inFilename = ""  # Name of filename containing input.
+outFilename = "" # Name of filename to send the output
+inEnc = ""       # The text encoding of the input file
+outEnc = ""      # The text encoding for the output file
+ignoreChars = "" # If lines begin with these symbols, ignore.
+columnRange = "" # Holds columns numbers to transliterate.
+delimiter = ""   # Holds user-defined column delimiter.
+reverse = 0      # When equal to 1, perform reverse transliteration, i.e.,
+                 # Unicode -> Buckwalter.
+
+# A function to print to screen the usage details of this script.
+
+def usage():
+	print "Usage:", sys.argv[0], "-i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR"
+	print "       -r -e INPUT_ENCODING, -E OUTPUT ENCODING]"
+	print "      ", sys.argv[0], "-l"
+	print "      ", sys.argv[0], "-h"
+	print ""
+	print "  -i INFILE, --input=INFILE:"
+	print "    Path to text file to be transliterated to Unicode."
+	print "  -o OUTFILE, --output=OUTFILE:"
+	print "    Path of file to output the newly transliterated text."
+	print "  -e ENC, --input-encoding=ENC:"
+	print "    Specify the text encoding of the source file. Default: latin_1."
+	print "  -E ENC, --output-encoding=ENC:"
+	print "    Specify the text encoding of the target file. Default: utf_8."
+	print "  -g CHARS, --ignore-lines=CHARS:"
+	print "    Will not transliterate lines that start with any of the CHARS"
+	print "    given. E.g., -g #; will not alter lines starting with # or ;."
+	print "    (May need to be -g \#\; on some platforms. See README.txt.)"
+	print "  -c RANGE, --columns=RANGE:"
+	print "    If in columns, select columns to apply transliteration. Can be"
+	print "    comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3."
+	print "  -d CHAR, --delimiter=CHAR:"
+	print "    Specify the delimiter that defines the column if using the -c"
+	print "    option above. Default is ' ' (space)."
+	print "  -r, --reverse:"
+	print "    Reverses the transliteration, i.e., Arabic to Buckwalter."
+	print "    When used, it will change the default input encoding to utf_8 and"
+	print "    output encoding to latin_1" 
+	print "  -l, --list-encodings:"
+	print "    Displays all supported file encodings."
+	print "  -h, --help:"
+	print "    Displays this page."
+	print ""
+
+# A function to print to screen all the available encodings supported by
+# Python.
+
+def displayEncodings():
+	print "Codec		Aliases				Languages"
+	print "ascii		646, us-ascii 			English"
+	print "cp037 		IBM037, IBM039 			English"
+	print "cp424 		EBCDIC-CP-HE, IBM424		Hebrew"
+	print "cp437 		437, IBM437 			English"
+	print "cp500 		EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 	Western Europe"
+	print "cp737						Greek"
+	print "cp775 		IBM775				Baltic languages"
+	print "cp850 		850, IBM850 			Western Europe"
+	print "cp852 		852, IBM852 			Central and Eastern Europe"
+	print "cp855 		855, IBM855 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
+	print "cp856 		 	 			Hebrew"
+	print "cp857 		857, IBM857 			Turkish"
+	print "cp860 		860, IBM860 			Portuguese"
+	print "cp861 		861, CP-IS, IBM861		Icelandic"
+	print "cp862 		862, IBM862 			Hebrew"
+	print "cp863 		863, IBM863 			Canadian"
+	print "cp864 		IBM864				Arabic"
+	print "cp865 		865, IBM865 			Danish, Norwegian"
+	print "cp869 		869, CP-GR, IBM869 		Greek"
+	print "cp874 	  					Thai"
+	print "cp875 	  					Greek"
+	print "cp1006 	  					Urdu"
+	print "cp1026 		ibm1026				Turkish"
+	print "cp1140 		ibm1140				Western Europe"
+	print "cp1250 		windows-1250 			Central and Eastern Europe"
+	print "cp1251 		windows-1251 			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
+	print "cp1252 		windows-1252 			Western Europe"
+	print "cp1253 		windows-1253 			Greek"
+	print "cp1254 		windows-1254 			Turkish"
+	print "cp1255 		windows-1255 			Hebrew"
+	print "cp1256 		windows-1256 			Arabic"
+	print "cp1257 		windows-1257		 	Baltic languages"
+	print "cp1258 		windows-1258		 	Vietnamese"
+	print "latin_1		iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1	West Europe"
+	print "iso8859_2 	iso-8859-2, latin2, L2		Central and Eastern Europe"
+	print "iso8859_3 	iso-8859-3, latin3, L3		Esperanto, Maltese"
+	print "iso8859_4 	iso-8859-4, latin4, L4		Baltic languagues"
+	print "iso8859_5 	iso-8859-5, cyrillic		Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
+	print "iso8859_6 	iso-8859-6, arabic		Arabic"
+	print "iso8859_7 	iso-8859-7, greek, greek8	Greek"
+	print "iso8859_8 	iso-8859-8, hebrew		Hebrew"
+	print "iso8859_9 	iso-8859-9, latin5, L5		Turkish"
+	print "iso8859_10 	iso-8859-10, latin6, L6 	Nordic languages"
+	print "iso8859_13 	iso-8859-13			Baltic languages"
+	print "iso8859_14 	iso-8859-14, latin8, L8		Celtic languages"
+	print "iso8859_15 	iso-8859-15			Western Europe"
+	print "koi8_r						Russian"
+	print "koi8_u						Ukrainian"
+	print "mac_cyrillic	maccyrillic			Bulgarian, Byelorussian, Macedonian, Russian, Serbian"
+	print "mac_greek	macgreek			Greek"
+	print "mac_iceland	maciceland			Icelandic"
+	print "mac_latin2	maclatin2, maccentraleurope	Central and Eastern Europe"
+	print "mac_roman 	macroman 			Western Europe"
+	print "mac_turkish 	macturkish 			Turkish"
+	print "utf_16 		U16, utf16 			all languages"
+	print "utf_16_be 	UTF-16BE 			all languages (BMP only)"
+	print "utf_16_le 	UTF-16LE 			all languages (BMP only)"
+	print "utf_7 		U7 				all languages"
+	print "utf_8 		U8, UTF, utf8 			all languages"
+
+def parseIgnoreString(string):
+	
+	symbols = []
+	
+	for char in string:
+		symbols.append(char)
+
+	return symbols
+
+# Begin parsing the command-line arguments...
+
+try:
+	(options, args) = getopt.getopt(sys.argv[1:], "i:o:e:E:g:c:d:rlh",
+	["input=","output=", "input-encoding=", "output-encoding=",
+	"ignore-lines=", "columns=", "delimiter=" "reverse", "list-encodings",
+	"help"])
+
+except getopt.GetoptError:
+	# print help information and exit:
+	usage()
+	sys.exit(1)
+
+# Loop over all arguments supplied by the user.
+for (x, y) in options:
+	if x in ("-h", "--help"): 
+		usage()
+		sys.exit(0)
+	
+	if x in ("-l", "--list-encodings"): 
+		displayEncodings()
+		sys.exit(0)
+		
+	if x in ("-i", "--input"): inFilename = y
+	if x in ("-o", "--output"): outFilename = y
+	if x in ("-e", "--input-encoding"): inEnc= y
+	if x in ("-E", "--output-encoding"): outEnc= y
+	if x in ("-r", "--reverse"): reverse = 1
+	if x in ("-g", "--ignore-lines"): ignoreChars = y
+	if x in ("-c", "--columns"): columnRange = y
+	if x in ("-d", "--delimiter"): 
+		delimiter = y
+		# Tabs come in off the command line from "\\t" to "\t". However,
+		# that's equivalent to "\\t" from python's point of view.
+		# Therefore replace any inputted "tabs" with proper tabs before
+		# proceeding.
+		delimiter = delimiter.replace("\\t", "\t")
+		# Do some error checking
+		if len(delimiter) > 1:
+			print >>sys.stderr, "Delimeter should only be a single character. Using first character" + delimiter[0]
+			delimiter = delimiter[0]
+		
+		if buck2uni.get(delimiter):
+			print >> sys.stderr, "Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set."
+			print >> sys.stderr, "This will obviously cause much confusion as a delimiter!"
+			print >> sys.stderr, "Please try again. Aborting..."
+			sys.exit(1)
+
+# If no delimiter was set then, set the default to " " (space)
+if not delimiter:
+	delimiter = " "
+
+# If user didn't specify the encoding of the input file, then revert to
+# defaults. The defaults can depending on the direction of
+# transliteration:
+#
+# Buckwalter -> Unicode, default = latin1
+# Unicode -> Buckwalter, default = utf_8
+
+
+if not inEnc:
+	if reverse:
+		inEnc = "utf_8"
+	else:
+		inEnc = "latin_1"
+
+# Similarly, if user didn't specify the encoding of the output file,
+# then revert to defaults. The defaults can depending on the direction
+# of transliteration:
+#
+# Buckwalter -> Unicode, default = utf_8 
+# Unicode -> Buckwalter, default # = latin_1
+
+if not outEnc:
+	if reverse:
+		outEnc = "latin_1"
+	else:
+		outEnc = "utf_8"
+
+# Ok, let's get the files open!
+
+# Providing a file for output was specified...
+if outFilename:
+    try:
+		# Create a file object, set it to "write" mode using the
+		# specified output encoding.
+		outFile = codecs.open(outFilename, "w", outEnc)
+
+    except IOError, msg:
+		# A problem occurred when trying to open this file. Report to
+		# user...
+        print msg
+        sys.exit(1)
+
+# Script can not work without somewhere to store the transliteration.
+# Exit. 
+else:
+	print "Must specify a file to use store the output! Aborting..."
+	sys.exit(1)
+
+# Providing a file for input was specified...
+if inFilename:
+    try:
+		# Create a file object, set it to "read" mode using the
+		# specified input encoding.
+		inFile = codecs.open(inFilename, "r", inEnc)
+
+    except IOError, msg:
+		# A problem occurred when trying to open this file. Report to
+		# user...
+        print msg
+        sys.exit(1)
+
+# This script requires a file to read from. Exit.
+else:
+	print "Must specify a file to use as input! Aborting..."
+	sys.exit(1)
+
+def getColsFromRange(cRange):
+	
+	columns = []
+	hyphenSearch = re.compile(r'-')
+
+	rangeElements = cRange.split(",")
+
+	for i in rangeElements:
+		# If it contains a hyphen (e.g., 1-3)
+		if hyphenSearch.search(i):
+			[start, end] = i.split("-")
+			columns = columns + range(int(start)-1,int(end))
+		else:
+			columns.append(int(i)-1)
+
+	return columns
+
+# This function transliterates a given string. It checks the direction
+# of the transliteration and then uses the appropriate dictionary. A
+# transliterated string is returned.
+
+def transliterate(inString, lineNumber):
+	out = ""
+
+	if columnRange:
+		columns = getColsFromRange(columnRange)
+
+		# Split the line on the delimiter
+		lineCols = inString.split(delimiter)
+
+		# Iterate over each column. If it's one of the ones in the range
+		# specified, then transliterate, otherwise just output column
+		# unchanged.
+
+		for i in range(len(lineCols)):
+			
+			# If first column, then don't prefix the delimiter
+			if i == 0:
+				if i in columns:
+					out = transliterateString(lineCols[i])
+				else :
+					out = lineCols[i]
+			else :
+				if i in columns:
+					out = out + delimiter + transliterateString(lineCols[i])
+				else :
+					out = out + delimiter + lineCols[i]
+
+	else:
+		out = transliterateString(inString)
+		
+
+
+	return out
+
+def transliterateString(inString):
+
+	out = ""
+	
+	# For normal Buckwalter -> Unicode transliteration..
+	if not reverse:
+
+		# Loop over each character in the string, inString.
+		for char in inString:
+			# Look up current char in the dictionary to get its
+			# respective value. If there is no match, e.g., chars like
+			# spaces, then just stick with the current char without any
+			# conversion.
+			out = out + buck2uni.get(char, char)
+	
+	# Same as above, just in the other direction.
+	else:
+
+		for char in inString:
+			out = out + uni2buck.get(char, char)
+
+	return out
+
+#while 1:
+#	line = inFile.readline().strip()
+#	line = line.decode(inEnc)
+#	if not line:
+#		break
+
+	# process string
+#	outFile.write(transliterate(line) + os.linesep)
+
+# Read in the lines of the input file.
+lines = inFile.readlines()
+
+currentLineNumber = 1
+# Loop over each line
+for line in lines:
+	line = line.strip()
+	try:
+		# Transliterate the current line, and then write the output to
+		# file.
+		
+		if not ignoreChars:
+			outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
+		else:
+			if line[0] in parseIgnoreString(ignoreChars):
+				outFile.write(line + " " + os.linesep)
+			else:
+				outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
+		
+		currentLineNumber = currentLineNumber + 1
+
+	except UnicodeError, msg:
+		# A problem when writing occurred. Report to user...
+		print msg
+		sys.exit(1)
+
+# All done! Better close the files used before terminating...
+inFile.close()
+outFile.close()
+
+# ... and relax! :)
diff --git a/egs/tunisian_msa/s5/local/buckwalter2utf8.pl b/egs/tunisian_msa/s5/local/buckwalter2utf8.pl
new file mode 100755
index 00000000000..c952e554f86
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/buckwalter2utf8.pl
@@ -0,0 +1,11 @@
+#!/usr/bin/env perl
+# Input buckwalter encoded Arabic and print it out as utf-8 encoded Arabic.
+use strict;
+use warnings;
+use Carp;
+
+use Encode::Arabic::Buckwalter;         # imports just like 'use Encode' would, plus more
+
+while ( my $line = <>) {
+    print encode 'utf8', decode 'buckwalter', $line;
+}
diff --git a/egs/tunisian_msa/s5/local/chain/compare_wer.sh b/egs/tunisian_msa/s5/local/chain/compare_wer.sh
new file mode 100755
index 00000000000..c6a3a91ea69
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/chain/compare_wer.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+# this script is used for comparing decoding results between systems.
+# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
+# For use with discriminatively trained systems you specify the epochs after a colon:
+# for instance,
+# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
+
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
+  echo "or (with epoch numbers for discriminative training):"
+  echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+include_looped=false
+if [ "$1" == "--looped" ]; then
+  include_looped=true
+  shift
+fi
+include_online=false
+if [ "$1" == "--online" ]; then
+  include_online=true
+  shift
+fi
+
+
+used_epochs=false
+
+# this function set_names is used to separate the epoch-related parts of the name
+# [for discriminative training] and the regular parts of the name.
+# If called with a colon-free directory name, like:
+#  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
+# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
+# If called with something like:
+#  set_names exp/chain/tdnn_d_sp_smbr:3
+# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
+
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+  epoch=$(echo $1 | cut -s -d: -f2)
+  if [ -z $epoch ]; then
+    epoch_infix=""
+  else
+    used_epochs=true
+    epoch_infix=_epoch${epoch}
+  fi
+}
+
+
+
+echo -n "# System                 "
+for x in $*; do   printf "% 10s" " $(basename $x)";   done
+echo
+
+test_sets=(devtest test)
+
+for t in ${test_sets[@]}; do
+   printf '# %%WER % 14s  ' $t
+   for x in $*; do
+     set_names $x  # sets $dirname and $epoch_infix
+     wer=$(cat $dirname/decode_$t/wer_* | utils/best_wer.sh | awk '{print $2}')
+     printf "% 10s" $wer
+   done
+   echo
+   if $include_looped; then
+     echo -n "#             [looped:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat $dirname/decode_looped_$t/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+   if $include_online; then
+     echo -n "#             [online:]    "
+     for x in $*; do
+       set_names $x  # sets $dirname and $epoch_infix
+       wer=$(cat ${dirname}_online/decode_$t/wer_* | utils/best_wer.sh | awk '{print $2}')
+       printf "% 10s" $wer
+     done
+     echo
+   fi
+done
+
+
+if $used_epochs; then
+  exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
+fi
+
+
+echo -n "# Final train prob       "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob       "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Num-params              "
+for x in $*; do
+  printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
+done
+echo
diff --git a/egs/tunisian_msa/s5/local/chain/run_tdnn.sh b/egs/tunisian_msa/s5/local/chain/run_tdnn.sh
new file mode 120000
index 00000000000..34499362831
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1a.sh
\ No newline at end of file
diff --git a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh
new file mode 100755
index 00000000000..d3c4a4ef11f
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh
@@ -0,0 +1,292 @@
+#!/bin/bash
+
+# Uses a resnet-style factored TDNN-F model.
+
+# ./local/chain/compare_wer.sh exp/chain/tdnn1a_sp
+# System                  tdnn1a_sp
+# %WER        devtest       39.25
+# %WER           test       49.74
+# Final train prob          -0.0473
+# Final valid prob          -0.0538
+# Final train prob (xent)   -1.0935
+# Final valid prob (xent)   -1.0817
+# Num-params                 3466448
+
+# First the options that are passed through to run_ivector_common.sh
+# (some of which are also used in this script directly).
+stage=0
+decode_nj=10
+train_set=train
+test_sets="devtest test"
+gmm=tri3b
+nnet3_affix=
+
+# The rest are configs specific to this script.  Most of the parameters
+# are just hardcoded at this level, in the commands below.
+affix=1a   # affix for the TDNN directory name
+tree_affix=
+train_stage=22
+get_egs_stage=-10
+decode_iter=
+
+num_leaves=3500
+
+# training options
+# training chunk-options
+chunk_width=140,100,160
+# we don't need extra left/right context for TDNN systems.
+dropout_schedule='0,0@0.20,0.3@0.50,0'
+common_egs_dir=
+xent_regularize=0.1
+
+# training options
+srand=0
+remove_egs=true
+reporting_email=
+
+#decode options
+test_online_decoding=true  # if true, it will run the last decoding stage.
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+# The iVector-extraction and feature-dumping parts are the same as the standard
+# nnet3 setup, and you can skip them by setting "--stage 11" if you have already
+# run those things.
+local/nnet3/run_ivector_common.sh --stage $stage \
+                                  --train-set $train_set \
+                                  --gmm $gmm \
+                                  --nnet3-affix "$nnet3_affix" || exit 1;
+
+# Problem: We have removed the "train_" prefix of our training set in
+# the alignment directory names! Bad!
+gmm_dir=exp/$gmm
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+tree_dir=exp/chain${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix}
+lang=data/lang_chain
+lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
+dir=exp/chain${nnet3_affix}/tdnn${affix}_sp
+train_data_dir=data/${train_set}_sp_hires
+lores_train_data_dir=data/${train_set}_sp
+train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+
+for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
+    $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+if [ $stage -le 10 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 11 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/align_fmllr_lats.sh --nj 20 --cmd "$train_cmd" ${lores_train_data_dir} \
+    data/lang $gmm_dir $lat_dir
+  rm $lat_dir/fsts.*.gz # save space
+fi
+
+if [ $stage -le 12 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+   if [ -f $tree_dir/final.mdl ]; then
+     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+     exit 1;
+  fi
+  steps/nnet3/chain/build_tree.sh \
+    --cmd "$train_cmd" \
+    --frame-subsampling-factor 3 \
+    --context-opts "--context-width=2 --central-position=1" \
+    $num_leaves \
+    ${lores_train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 13 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  affine_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.03 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.015"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=100 name=ivector
+  input dim=40 name=input
+
+  # please note that it is important to have input layer with the name=input
+  # as the layer immediately preceding the fixed-affine-layer to enable
+  # the use of short notation for the descriptor
+  fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+  # the first splicing is moved before the lda layer, so no splicing here
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=768
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=768 bottleneck-dim=64 time-stride=3
+  linear-component name=prefinal-l dim=192 $linear_opts
+
+  ## adding the layers for chain branch
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=768 small-dim=192
+  output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
+
+  # adding the layers for xent branch
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=768 small-dim=192
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 14 ]; then
+  steps/nnet3/chain/train.py \
+    --stage=$train_stage \
+    --cmd="$decode_cmd" \
+    --feat.online-ivector-dir=$train_ivector_dir \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.0 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=8 \
+    --trainer.frames-per-iter=3000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=5 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.num-chunk-per-minibatch=128,64 \
+    --egs.chunk-width=$chunk_width \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 15 ]; then
+  # Note: it's not important to give mkgraph.sh the lang directory with the
+  # matched topology (since it gets the topology file from the model).
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 \
+    data/lang_test \
+    $tree_dir \
+    $tree_dir/graph || exit 1;
+fi
+
+if [ $stage -le 16 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+    nspk=$(wc -l <data/${data}_hires/spk2utt)
+    steps/nnet3/decode.sh \
+      --acwt 1.0 \
+      --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nspk \
+      --cmd "$decode_cmd" \
+      --num-threads 4 \
+      --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \
+      $tree_dir/graph \
+      data/${data}_hires \
+      ${dir}/decode_${data} || exit 1;
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+# Not testing the 'looped' decoding separately, because for
+# TDNN systems it would give exactly the same results as the
+# normal decoding.
+
+if $test_online_decoding && [ $stage -le 17 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+    --mfcc-config conf/mfcc_hires.conf \
+    $lang \
+    exp/nnet3${nnet3_affix}/extractor \
+    ${dir} \
+    ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+
+  for data in $test_sets; do
+    (
+    nspk=$(wc -l <data/${data}_hires/spk2utt)
+    # note: we just give it "data/${data}" as it only uses the wav.scp, the
+    # feature type does not matter.
+    steps/online/nnet3/decode.sh \
+      --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nspk --cmd "$decode_cmd" \
+      $tree_dir/graph data/${data} ${dir}_online/decode_${data} || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
+fi
+
+exit 0;
+
+# Local Variables:
+# tab-width: 2
+# indent-tabs-mode: nil
+# End:
diff --git a/egs/tunisian_msa/s5/local/devtest_recordings_make_lists.pl b/egs/tunisian_msa/s5/local/devtest_recordings_make_lists.pl
new file mode 100755
index 00000000000..a872ed432fe
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/devtest_recordings_make_lists.pl
@@ -0,0 +1,81 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# devtest_recordings_make_lists.pl - make acoustic model training lists
+
+use strict;
+use warnings;
+use Carp;
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+BEGIN {
+    @ARGV == 3 or croak "USAGE $0 <TRANSCRIPT_FILENAME> <SPEAKER_NAME> <COUNTRY>
+example:
+$0 Tunisian_MSA/data/transcripts/devtest/recordings.tsv 6 tunisia
+";
+}
+
+my ($tr,$spk,$l) = @ARGV;
+
+open my $I, '<', $tr or croak "problems with $tr";
+
+my $tmp_dir = "data/local/tmp/$l/$spk";
+
+# input wav file list
+my $wav_list = "$tmp_dir/wav.txt";
+croak "$!" unless ( -f $wav_list );
+# output temporary wav.scp files
+my $wav_scp = "$tmp_dir/wav.scp";
+
+# output temporary utt2spk files
+my $u = "$tmp_dir/utt2spk";
+
+# output temporary text files
+my $t = "$tmp_dir/text";
+
+# initialize hash for prompts
+my %p = ();
+
+# store prompts in hash
+LINEA: while ( my $line = <$I> ) {
+    chomp $line;
+    my ($s,$sent) = split /\t/, $line, 2;
+    $p{$s} = $sent;
+}
+
+open my $W, '<', $wav_list or croak "problem with $wav_list $!";
+open my $O, '+>', $wav_scp or croak "problem with $wav_scp $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+ LINE: while ( my $line = <$W> ) {
+     chomp $line;
+     next LINE if ($line =~ /answers/ );
+     next LINE unless ( $line =~ /Recordings/ );
+     my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+     my @dirs = split /\//, $directories;
+     my $b = basename $line, ".wav";
+     my $s = $dirs[-1];
+     my $rid = $s . '_' . 'recording' . '_' . $b;
+     my $uid = $s . '_' . 'recording';
+     if ( exists $p{$b} ) {
+	 print $T "$rid\t$p{$b}\n";
+     } elsif ( defined $s ) {
+	 warn  "problem\t$s";
+	 next LINE;
+     } else {
+	 croak "$line";
+     }
+
+     print $O "$rid sox $line -t wav - |\n";
+	print $U "$rid\t$uid\n";
+}
+close $T;
+close $O;
+close $U;
+close $W;
diff --git a/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh b/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh
new file mode 100755
index 00000000000..e8ff9a150ea
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/nnet3/run_ivector_common.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# This script is called from local/nnet3/run_tdnn.sh and
+# local/chain/run_tdnn.sh (and may eventually be called by more
+# scripts).  It contains the common feature preparation and
+# iVector-related parts of the script.  See those scripts for examples
+# of usage.
+
+stage=0
+train_set=train
+test_sets="devtest test"
+gmm=tri3b
+
+nnet3_affix=
+
+. ./cmd.sh
+. ./path.sh
+. utils/parse_options.sh
+
+gmm_dir=exp/${gmm}
+ali_dir=exp/${gmm}_ali_${train_set}_sp
+
+for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do
+  if [ ! -f $f ]; then
+    echo "$0: expected file $f to exist"
+    exit 1
+  fi
+done
+
+if [ $stage -le 1 ]; then
+    # perturb data to get alignments
+    # nnet will be trained by high resolution data
+    # _sp stands for speed-perturbed
+    echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
+    utils/data/perturb_data_dir_speed_3way.sh \
+	data/${train_set} \
+	data/${train_set}_sp
+    echo "$0: making mfcc features for low-resolution speed-perturbed data"
+    steps/make_mfcc.sh \
+	--cmd "$train_cmd" \
+	--nj 10 \
+	data/${train_set}_sp
+    steps/compute_cmvn_stats.sh \
+	data/${train_set}_sp
+    utils/fix_data_dir.sh \
+	data/${train_set}_sp
+fi
+
+if [ $stage -le 2 ]; then
+    echo "$0: aligning with the perturbed low-resolution data"
+    steps/align_fmllr.sh \
+	--nj 20 \
+	--cmd "$train_cmd" \
+	data/${train_set}_sp \
+	data/lang \
+	$gmm_dir \
+	$ali_dir
+fi
+
+if [ $stage -le 3 ]; then
+    # Create high-resolution MFCC features (with 40 cepstra instead of 13).
+
+    echo "$0: creating high-resolution MFCC features"
+    mfccdir=data/${train_set}_sp_hires/data
+    for datadir in ${train_set}_sp ${test_sets}; do
+	utils/copy_data_dir.sh \
+	    data/$datadir \
+	    data/${datadir}_hires
+    done
+
+    # do volume-perturbation on the training data prior to extracting hires
+    # features; this helps make trained nnets more invariant to test data volume.
+    utils/data/perturb_data_dir_volume.sh \
+	data/${train_set}_sp_hires
+
+    for datadir in ${train_set}_sp ${test_sets}; do
+	steps/make_mfcc.sh \
+	    --nj 10 \
+	    --mfcc-config conf/mfcc_hires.conf \
+	    --cmd "$train_cmd" \
+	    data/${datadir}_hires
+	steps/compute_cmvn_stats.sh \
+	    data/${datadir}_hires
+	utils/fix_data_dir.sh \
+	    data/${datadir}_hires
+    done
+fi
+
+if [ $stage -le 4 ]; then
+    echo "$0: computing a subset of data to train the diagonal UBM."
+    # We'll use about a quarter of the data.
+    mkdir -p exp/nnet3${nnet3_affix}/diag_ubm
+    temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm
+
+    num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
+    num_utts=$[$num_utts_total/4]
+    utils/data/subset_data_dir.sh \
+	data/${train_set}_sp_hires \
+	$num_utts \
+	${temp_data_root}/${train_set}_sp_hires_subset
+
+    echo "$0: computing a PCA transform from the hires data."
+    steps/online/nnet2/get_pca_transform.sh \
+	--cmd "$train_cmd" \
+	--splice-opts "--left-context=3 --right-context=3" \
+	--max-utts 10000 \
+	--subsample 2 \
+	${temp_data_root}/${train_set}_sp_hires_subset \
+	exp/nnet3${nnet3_affix}/pca_transform
+
+    echo "$0: training the diagonal UBM."
+    # Use 512 Gaussians in the UBM.
+    steps/online/nnet2/train_diag_ubm.sh \
+	--cmd "$train_cmd" \
+	--nj 20 \
+	--num-frames 700000 \
+	--num-threads 8 \
+	${temp_data_root}/${train_set}_sp_hires_subset \
+	512 \
+	exp/nnet3${nnet3_affix}/pca_transform \
+	exp/nnet3${nnet3_affix}/diag_ubm
+fi
+
+if [ $stage -le 5 ]; then
+    # Train the iVector extractor.
+    # Use all the speed-perturbed data .
+    # iVector extractors can be sensitive to the amount of data.
+    # The script defaults to an iVector dimension of 100.
+    echo "$0: training the iVector extractor"
+    steps/online/nnet2/train_ivector_extractor.sh \
+	--cmd "$train_cmd" \
+	--nj 10 \
+	data/${train_set}_sp_hires \
+	exp/nnet3${nnet3_affix}/diag_ubm \
+	exp/nnet3${nnet3_affix}/extractor
+fi
+
+# combine   and train system on short segments.
+# extract iVectors on speed-perturbed training data
+# With --utts-per-spk-max 2, script pairs  utterances into twos.
+# Treats each  pair as one speaker.
+# Gives more diversity in iVectors.
+# Extracted online.
+
+# note: extract  ivectors from max2 data
+# Why is max2 not encoded in ivectordir name?
+# valid for non-max2 data
+#  utterance list is the same.
+
+# having a larger number of speakers is helpful for generalization, and to
+# handle per-utterance decoding well (iVector starts at zero).
+
+if [ $stage -le 6 ]; then
+    ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
+    temp_data_root=${ivectordir}
+    utils/data/modify_speaker_info.sh \
+	--utts-per-spk-max 2 \
+	data/${train_set}_sp_hires \
+	${temp_data_root}/${train_set}_sp_hires_max2
+
+    steps/online/nnet2/extract_ivectors_online.sh \
+	--cmd "$train_cmd" \
+	--nj 20 \
+	${temp_data_root}/${train_set}_sp_hires_max2 \
+	exp/nnet3${nnet3_affix}/extractor \
+	$ivectordir
+fi
+
+# Also extract iVectors for test data.
+# No need for speed perturbation (sp).
+
+if [ $stage -le 7 ]; then
+    for data in $test_sets; do
+	steps/online/nnet2/extract_ivectors_online.sh \
+	    --cmd "$train_cmd" \
+	    --nj 1 \
+	    data/${data}_hires \
+	    exp/nnet3${nnet3_affix}/extractor \
+	    exp/nnet3${nnet3_affix}/ivectors_${data}_hires
+    done
+fi
+
+exit 0
diff --git a/egs/tunisian_msa/s5/local/prepare_data.sh b/egs/tunisian_msa/s5/local/prepare_data.sh
new file mode 100755
index 00000000000..fcf64582943
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/prepare_data.sh
@@ -0,0 +1,141 @@
+#!/bin/bash  
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# configuration variables
+tmpdir=data/local/tmp
+download_dir=$(pwd)
+tmp_tunis=$tmpdir/tunis
+tmp_libyan=$tmpdir/libyan
+data_dir=$download_dir/Tunisian_MSA/data
+# location of test data 
+libyan_src=$data_dir/speech/test/Libyan_MSA
+# end of configuration variable settings
+
+# process the Tunisian MSA devtest data
+
+# get list of  wav files
+for s in devtest/CTELLONE/Recordings_Arabic/6 devtest/CTELLTHREE/Recordings_Arabic/10; do
+  echo "$0: looking for wav files for $s."
+  mkdir -p $tmp_tunis/$s
+  find $data_dir/speech/$s -type f \
+  -name "*.wav" | grep Recordings_Arabic > $tmp_tunis/$s/wav.txt
+
+  local/devtest_recordings_make_lists.pl \
+  $data_dir/transcripts/devtest/recordings.tsv $s tunis
+
+  mkdir -p data/devtest
+
+  for x in wav.scp utt2spk text; do
+    cat     $tmp_tunis/$s/$x | tr "	" " " >> data/devtest/$x
+  done
+done
+
+utils/utt2spk_to_spk2utt.pl data/devtest/utt2spk | sort > data/devtest/spk2utt
+
+utils/fix_data_dir.sh data/devtest
+
+# training data consists of 2 parts: answers and recordings (recited)
+answers_transcripts=$data_dir/transcripts/train/answers.tsv
+recordings_transcripts=$data_dir/transcripts/train/recordings.tsv
+
+# location of test data
+cls_rec_tr=$libyan_src/cls/data/transcripts/recordings/cls_recordings.tsv
+lfi_rec_tr=$libyan_src/lfi/data/transcripts/recordings/lfi_recordings.tsv
+srj_rec_tr=$libyan_src/srj/data/transcripts/recordings/srj_recordings.tsv
+mbt_rec_tr=$data_dir/transcripts/test/mbt/recordings/mbt_recordings.tsv
+
+# make acoustic model training  lists
+mkdir -p $tmp_tunis
+
+# get  wav file names
+
+# for recited speech
+# the data collection laptops had names like CTELLONE CTELLTWO ...
+for machine in CTELLONE CTELLTWO CTELLTHREE CTELLFOUR CTELLFIVE; do
+  find $data_dir/speech/train/$machine -type f -name "*.wav" | grep Recordings \
+  >> $tmp_tunis/recordings_wav.txt
+done
+
+# get file names for Answers 
+for machine in CTELLONE CTELLTWO CTELLTHREE CTELLFOUR CTELLFIVE; do
+  find $data_dir/speech/train/$machine -type f \
+    -name "*.wav" \
+    | grep Answers >> $tmp_tunis/answers_wav.txt
+done
+
+# make separate transcription lists for answers and recordings
+export LC_ALL=en_US.UTF-8
+local/answers_make_lists.pl $answers_transcripts
+
+utils/fix_data_dir.sh $tmp_tunis/answers
+
+local/recordings_make_lists.pl $recordings_transcripts
+
+utils/fix_data_dir.sh $tmp_tunis/recordings
+
+# consolidate lists
+# acoustic models will be trained on both recited and prompted speech
+mkdir -p $tmp_tunis/lists
+
+for x in wav.scp utt2spk text; do
+  cat $tmp_tunis/answers/$x $tmp_tunis/recordings/$x > $tmp_tunis/lists/$x
+done
+
+utils/fix_data_dir.sh $tmp_tunis/lists
+
+# get training lists
+mkdir -p data/train
+for x in wav.scp utt2spk text; do
+  sort $tmp_tunis/lists/$x | tr "	" " " > data/train/$x
+done
+
+utils/utt2spk_to_spk2utt.pl data/train/utt2spk | sort > data/train/spk2utt
+
+utils/fix_data_dir.sh data/train
+
+# process the Libyan MSA data
+mkdir -p $tmp_libyan
+
+for s in cls lfi srj; do
+  mkdir -p $tmp_libyan/$s
+
+  # get list of  wav files
+  find $libyan_src/$s -type f \
+    -name "*.wav" \
+    | grep recordings > $tmp_libyan/$s/recordings_wav.txt
+
+  echo "$0: making recordings list for $s"
+  local/test_recordings_make_lists.pl \
+    $libyan_src/$s/data/transcripts/recordings/${s}_recordings.tsv $s libyan
+done
+
+# process the Tunisian MSA test data
+
+mkdir -p $tmp_tunis/mbt
+
+# get list of  wav files
+find $data_dir/speech/test/mbt -type f \
+  -name "*.wav" \
+  | grep recordings > $tmp_tunis/mbt/recordings_wav.txt
+
+echo "$0: making recordings list for mbt"
+local/test_recordings_make_lists.pl \
+  $data_dir/transcripts/test/mbt/recordings/mbt_recordings.tsv mbt tunis
+
+mkdir -p data/test
+# get the Libyan files
+for s in cls lfi srj; do
+  for x in wav.scp utt2spk text; do
+    cat     $tmp_libyan/$s/recordings/$x | tr "	" " " >> data/test/$x
+  done
+done
+
+for x in wav.scp utt2spk text; do
+  cat     $tmp_tunis/mbt/recordings/$x | tr "	" " " >> data/test/$x
+done
+
+utils/utt2spk_to_spk2utt.pl data/test/utt2spk | sort > data/test/spk2utt
+
+utils/fix_data_dir.sh data/test
diff --git a/egs/tunisian_msa/s5/local/prepare_dict.sh b/egs/tunisian_msa/s5/local/prepare_dict.sh
new file mode 100755
index 00000000000..f7d1ac3a619
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/prepare_dict.sh
@@ -0,0 +1,43 @@
+#!/bin/bash -u
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+set -o errexit
+
+[ -f ./path.sh ] && . ./path.sh
+
+if [ ! -d data/local/dict ]; then
+  mkdir -p data/local/dict
+fi
+
+l=$1
+export LC_ALL=C
+
+cut -f2- -d " " $l | tr -s '[:space:]' '[\n*]' | grep -v SPN | \
+    sort -u | tail -n+2 > data/local/dict/nonsilence_phones.txt
+
+expand -t 1 $l | sort -u | \
+    sed "1d" > data/local/dict/lexicon.txt
+
+echo "<UNK> SPN" >> data/local/dict/lexicon.txt
+
+# silence phones, one per line.
+{
+    echo SIL;
+    echo SPN;
+} \
+    > \
+    data/local/dict/silence_phones.txt
+
+echo SIL > data/local/dict/optional_silence.txt
+
+# get the phone list from the lexicon file
+(
+    tr '\n' ' ' < data/local/dict/silence_phones.txt;
+    echo;
+    tr '\n' ' ' < data/local/dict/nonsilence_phones.txt;
+    echo;
+) >data/local/dict/extra_questions.txt
+
+echo "$0: Finished dictionary preparation."
diff --git a/egs/tunisian_msa/s5/local/prepare_lm.sh b/egs/tunisian_msa/s5/local/prepare_lm.sh
new file mode 100755
index 00000000000..4fc50b84d11
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/prepare_lm.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+. ./cmd.sh
+set -e
+. ./path.sh
+. $KALDI_ROOT/tools/env.sh
+stage=0
+nsegs=1000000;  # limit the number of training segments
+
+. ./utils/parse_options.sh
+
+if [ ! -d data/local/lm ]; then
+    mkdir -p data/local/lm
+fi
+
+corpus=$1
+
+if [ ! -f $corpus ]; then
+  echo "$0: input data $corpus not found."
+  exit 1
+fi
+
+perl -MList::Util=shuffle -e 'print shuffle(<STDIN>);' < $corpus | \
+     head -n $nsegs > data/local/lm/train.txt
+
+if ! command ngram-count >/dev/null; then
+  if uname -a | grep darwin >/dev/null; then # For MACOSX...
+    sdir=$KALDI_ROOT/tools/srilm/bin/macosx
+  elif uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
+    sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64
+  else
+    sdir=$KALDI_ROOT/tools/srilm/bin/i686
+  fi
+  if [ -f $sdir/ngram-count ]; then
+    echo Using SRILM tools from $sdir
+    export PATH=$PATH:$sdir
+  else
+    echo You appear to not have SRILM tools installed, either on your path,
+    echo or installed in $sdir.  See tools/install_srilm.sh for installation
+    echo instructions.
+    exit 1
+  fi
+fi
+
+
+ngram-count -order 3 -interpolate -unk -map-unk "<UNK>" \
+    -limit-vocab -text data/local/lm/train.txt -lm data/local/lm/trigram.arpa || exit 1;
+
+gzip -f data/local/lm/trigram.arpa
diff --git a/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.pl b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.pl
new file mode 100755
index 00000000000..9074d4807c2
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.pl
@@ -0,0 +1,21 @@
+#!/usr/bin/env perl
+#qcri_buckwalter2utf8.pl - convert the qcri dictionary toutf8
+
+use strict;
+use warnings;
+use Carp;
+
+use Encode::Arabic::Buckwalter;         # imports just like 'use Encode' would, plus more
+
+my $bw_dict = "qcri.txt";
+
+open my $B, '<', $bw_dict or croak "Problem with $bw_dict $!";
+
+ LINE: while ( my $line = <$B> ) {
+     chomp $line;
+     next LINE if ( $line =~ /^\#/ );
+     my ($w,$p) = split / /, $line, 2;
+     print encode 'utf8', decode 'buckwalter', $w;
+     print " $p\n";
+}
+
diff --git a/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh
new file mode 100755
index 00000000000..b8433967e14
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/qcri_buckwalter2utf8.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# write separate files for word and pronunciation fields
+cut -d " " -f 1 qcri.txt > qcri_words_buckwalter.txt
+cut -d " " -f 2- qcri.txt > qcri_prons.txt
+
+# convert words to utf8 
+local/buckwalter2unicode.py -i qcri_words_buckwalter.txt -o qcri_words_utf8.txt
+
+paste qcri_words_utf8.txt qcri_prons.txt
+
+rm qcri_words_buckwalter.txt qcri_words_utf8.txt qcri_prons.txt
diff --git a/egs/tunisian_msa/s5/local/qcri_lexicon_download.sh b/egs/tunisian_msa/s5/local/qcri_lexicon_download.sh
new file mode 100755
index 00000000000..29a9ca1eed6
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/qcri_lexicon_download.sh
@@ -0,0 +1,24 @@
+#!/bin/bash 
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# configuration variables
+lex=$1
+tmpdir=data/local/tmp
+# where to put the downloaded speech corpus
+downloaddir=$(pwd)
+# Where to put the uncompressed file
+datadir=$(pwd)
+# end of configuration variable settings
+
+# download the corpus 
+if [ ! -f $downloaddir/qcri.txt.bz2 ]; then
+  wget -O $downloaddir/qcri.txt.bz2 $lex
+  (
+    cd $downloaddir
+    bzcat qcri.txt.bz2 | tail -n+4 > $datadir/qcri.txt
+  )
+else
+  echo "$0: The corpus $lex was already downloaded."
+fi
diff --git a/egs/tunisian_msa/s5/local/recordings_make_lists.pl b/egs/tunisian_msa/s5/local/recordings_make_lists.pl
new file mode 100755
index 00000000000..41fc15e0dd3
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/recordings_make_lists.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# recordings_make_lists.pl - make acoustic model training lists
+
+use strict;
+use warnings;
+use Carp;
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+my $tmpdir = "data/local/tmp/tunis";
+
+system "mkdir -p $tmpdir/recordings";
+
+# input wav file list
+my $w = "$tmpdir/recordings_wav.txt";
+
+# output temporary wav.scp files
+my $o = "$tmpdir/recordings/wav.scp";
+
+# output temporary utt2spk files
+my $u = "$tmpdir/recordings/utt2spk";
+
+# output temporary text files
+my $t = "$tmpdir/recordings/text";
+
+# initialize hash for prompts
+my %p = ();
+
+# store prompts in hash
+LINEA: while ( my $line = <> ) {
+    chomp $line;
+    my ($s,$sent) = split /\t/, $line, 2;
+    $p{$s} = $sent;
+}
+
+open my $W, '<', $w or croak "problem with $w $!";
+open my $O, '+>', $o or croak "problem with $o $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+ LINE: while ( my $line = <$W> ) {
+     chomp $line;
+     next LINE if ($line =~ /Answers/ );
+     next LINE unless ( $line =~ /Recordings/ );
+     my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+     my @dirs = split /\//, $directories;
+     my $machine = $dirs[-3];
+     my $r = basename $line, ".wav";
+     my $s = $dirs[-1];
+     my $rid = $machine . '_' . $s . '_r_' . $r;
+     if ( exists $p{$r} ) {
+	 print $T "$rid\t$p{$r}\n";
+     } elsif ( defined $rid ) {
+	 warn  "problem\t$rid";
+	 next LINE;
+     } else {
+	 croak "$line";
+     }
+
+     print $O "$rid sox $line -t wav - |\n";
+	print $U "$rid\t${machine}_${s}_r\n";
+}
+close $T;
+close $O;
+close $U;
+close $W;
diff --git a/egs/tunisian_msa/s5/local/score.sh b/egs/tunisian_msa/s5/local/score.sh
new file mode 120000
index 00000000000..0afefc3158c
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/score.sh
@@ -0,0 +1 @@
+../steps/score_kaldi.sh
\ No newline at end of file
diff --git a/egs/tunisian_msa/s5/local/subs_download.sh b/egs/tunisian_msa/s5/local/subs_download.sh
new file mode 100755
index 00000000000..7e46fd255aa
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/subs_download.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# Begin configuration 
+subs_src=$1
+tmpdir=data/local/tmp
+download_dir=$(pwd)
+datadir=$(pwd)
+# End configuration
+
+# download the subs corpus
+if [ ! -f $download_dir/subs.txt.gz ]; then
+    wget -O $download_dir/subs.txt.gz $subs_src
+else
+  echo "$0: The corpus $subs_src was already downloaded."
+fi
+
+if [ ! -f $datadir/subs.txt ]; then
+  (
+    cd $datadir
+    zcat < ./subs.txt.gz > subs.txt
+  )
+  else
+    echo "$0: subs file already extracted."
+fi
diff --git a/egs/tunisian_msa/s5/local/subs_prepare_data.pl b/egs/tunisian_msa/s5/local/subs_prepare_data.pl
new file mode 100755
index 00000000000..e39f77a25cb
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/subs_prepare_data.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# subs_prepare_data.pl - condition subs data for lm training
+
+use strict;
+use warnings;
+use Carp;
+
+use Encode;
+
+# set lower and upper bounds
+my $low_bound = 8;
+# only segments with at least  $low_bound words will be written
+my $up_bound = 16;
+# only segments with fewer than $up_bound words will be written
+
+# input and output files
+my $corp = "subs.txt";
+my $symtab = "data/lang/words.txt";
+my $conditioned = "data/local/tmp/subs/lm/ar.txt";
+my $oo = "data/local/tmp/subs/lm/oovs.txt";
+my $iv = "data/local/tmp/subs/lm/in_vocabulary.txt";
+
+open my $CORP, '<', $corp or croak "problems with $corp $!";
+system "mkdir -p data/local/tmp/subs/lm";
+open my $COND, '+>:utf8', $conditioned or croak "problems with $conditioned $!";
+
+if ( -s $conditioned ) {
+    croak "$conditioned already exists.";
+} else {
+  LINE: while ( my $line = <$CORP> ) {
+      $line = decode_utf8 $line;
+      chomp $line;
+
+      my @tokens = split /\s+/, $line;
+
+      next LINE if ( ($#tokens < $low_bound) or ($#tokens > $up_bound ));
+
+      # remove punctuation
+      $line =~ s/(\p{Punctuation}+|\p{Dash_Punctuation}+|\p{Close_Punctuation}+|\p{Open_Punctuation}+|\p{Initial_Punctuation}+|\p{Final_Punctuation}+|\p{Connector_Punctuation}+|\p{Other_Punctuation}+|[	 ]+)/ /msxg;
+      #convert tabs to white space
+      $line =~ s/\t/ /g;
+      #hard to soft space
+      $line =~ s/ / /g;
+      #squeeze white space
+      $line =~ s/\s+/ /g;
+      #initial and final white space
+      $line =~ s/^\p{Separator}+//;
+      $line =~ s/\p{Separator}+$//;
+      #down case
+      $line = lc $line;
+
+      print $COND "$line\n";
+  }
+}close $CORP;
+close $COND;
+
+# find out of vocabulary words
+# $symtab points to a file containing a map of symbols to integers
+
+# hash for word to integer map
+my %sym2int = ();
+
+open my $F, '<', $symtab or croak "problem with $symtab $!";
+
+# store words to int map in hash
+while( my $line = <$F>) {
+    chomp $line;
+    my ($s,$i) = split /\s/, $line, 2;
+    $sym2int{$s} = $i;
+}
+close $F;
+
+open my $I, '<', $conditioned or croak "problem with $conditioned $!";
+open my $OO, '+>', $oo or croak "problems with $oo $!";
+
+while ( my $line = <$I>) {
+    chomp $line;
+    my @A = split /\s/, $line;
+    foreach my $a (@A) {
+	if (!defined ($sym2int{$a})) {
+            print $OO "$a\n";
+	}
+    }
+}
+close $OO;
+close $I;
+
+# remove segments with OOVs
+
+# store OOVS in hash
+my %oov = ();
+open my $V, '<', $oo or croak "problems with $oo $!";
+while ( my $line = <$V> ) {
+    chomp $line;
+    $oov{$line} = 1;
+}
+close $V;
+
+open my $L, '<', $conditioned or croak "problems with $conditioned $!";
+open my $IV, '+>', $iv or croak "problems with $iv $!";
+
+SEGMENT: while ( my $segment = <$L> ) {
+    chomp $segment;
+    my @words = split /\s+/, $segment;
+    foreach my $word ( sort @words ) {
+	next SEGMENT if ( $oov{$word} );
+    }
+    print $IV "$segment\n";
+}
+close $IV;
+close $L;
diff --git a/egs/tunisian_msa/s5/local/tamsa_download.sh b/egs/tunisian_msa/s5/local/tamsa_download.sh
new file mode 100755
index 00000000000..5e4666482ab
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/tamsa_download.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+speech=$1
+
+# where to put the downloaded speech corpus
+download_dir=$(pwd)
+data_dir=$download_dir/Tunisian_MSA/data
+
+# download the corpus from openslr
+if [ ! -f $download_dir/tamsa.tar.gz ]; then
+    wget -O $download_dir/tamsa.tar.gz $speech
+else
+  echo "$0: The corpus $speech was already downloaded."
+fi
+
+if [ ! -d $download_dir/Tunisian_MSA ]; then
+  (
+    cd $download_dir
+    tar -xzf tamsa.tar.gz
+  )
+else
+  echo "$0: The corpus was already unzipped."
+fi
diff --git a/egs/tunisian_msa/s5/local/test_answers_make_lists.pl b/egs/tunisian_msa/s5/local/test_answers_make_lists.pl
new file mode 100755
index 00000000000..aa7d0e314f3
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/test_answers_make_lists.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Zac76
+# Apache 2.0.
+
+# test_answers_make_lists.pl - make acoustic model training lists
+
+use strict;
+use warnings;
+use Carp;
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+BEGIN {
+    @ARGV == 3 or croak "USAGE $0 <TRANSCRIPT_FILENAME> <SPEAKER_NAME> <COUNTRY>
+example:
+$0 /home/zak76/Desktop/Kaldi/kaldi-master/tunisian_msa-master/Libyan_collected_test/TEST/Libyan_MSA/adel/data/transcripts/answers/adel_answers.tsv adel libyan
+";
+}
+
+my ($tr,$spk,$l) = @ARGV;
+
+open my $I, '<', $tr or croak "problems with $tr";
+
+my $tmp_dir = "data/local/tmp/$l/$spk";
+
+system "mkdir -p $tmp_dir/answers";
+
+# input wav file list
+my $w = "$tmp_dir/answers_wav.txt";
+
+# output temporary wav.scp files
+my $o = "$tmp_dir/answers/wav.scp";
+
+# output temporary utt2spk files
+my $u = "$tmp_dir/answers/utt2spk";
+
+# output temporary text files
+my $t = "$tmp_dir/answers/text";
+
+# initialize hash for prompts
+my %p = ();
+
+# store prompts in hash
+LINEA: while ( my $line = <$I> ) {
+    chomp $line;
+    my ($s,$sent) = split /\t/, $line, 2;
+    $p{$s} = $sent;
+}
+
+open my $W, '<', $w or croak "problem with $w $!";
+open my $O, '+>', $o or croak "problem with $o $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+ LINE: while ( my $line = <$W> ) {
+     chomp $line;
+     next LINE if ($line =~ /recordings/ );
+     next LINE unless ( $line =~ /answers/ );
+     my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+     my @dirs = split /\//, $directories;
+     my $b = basename $line, ".wav";
+     my ($sk,$r) = split /\_/, $b, 2;
+     my $s = $dirs[-1];
+     my $rid = $sk . '_' . $r;
+     if ( exists $p{$b} ) {
+	 print $T "$rid\t$p{$b}\n";
+     } elsif ( defined $rid ) {
+	 warn  "problem\t$rid";
+	 next LINE;
+     } else {
+	 croak "$line";
+     }
+
+     print $O "$rid sox $line -t wav - |\n";
+	print $U "$rid\t${sk}_a\n";
+}
+close $T;
+close $O;
+close $U;
+close $W;
diff --git a/egs/tunisian_msa/s5/local/test_recordings_make_lists.pl b/egs/tunisian_msa/s5/local/test_recordings_make_lists.pl
new file mode 100755
index 00000000000..0b1323f2738
--- /dev/null
+++ b/egs/tunisian_msa/s5/local/test_recordings_make_lists.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/env perl
+
+# Copyright 2018 John Morgan
+# Apache 2.0.
+
+# test_recordings_make_lists.pl - make acoustic model training lists
+
+use strict;
+use warnings;
+use Carp;
+
+use File::Spec;
+use File::Copy;
+use File::Basename;
+
+BEGIN {
+    @ARGV == 3 or croak "USAGE $0 <TRANSCRIPT_FILENAME> <SPEAKER_NAME> <COUNTRY>
+example:
+$0 /mnt/disk01/Libyan_MSA/srj/data/transcripts/recordings/srj_recordings.tsv srj libyan
+";
+}
+
+my ($tr,$spk,$l) = @ARGV;
+
+open my $I, '<', $tr or croak "problems with $tr";
+
+my $tmp_dir = "data/local/tmp/$l/$spk";
+
+system "mkdir -p $tmp_dir/recordings";
+
+# input wav file list
+my $w = "$tmp_dir/recordings_wav.txt";
+
+# output temporary wav.scp files
+my $o = "$tmp_dir/recordings/wav.scp";
+
+# output temporary utt2spk files
+my $u = "$tmp_dir/recordings/utt2spk";
+
+# output temporary text files
+my $t = "$tmp_dir/recordings/text";
+
+# initialize hash for prompts
+my %p = ();
+
+# store prompts in hash
+LINEA: while ( my $line = <$I> ) {
+    chomp $line;
+    my ($s,$sent) = split /\t/, $line, 2;
+    $p{$s} = $sent;
+}
+
+open my $W, '<', $w or croak "problem with $w $!";
+open my $O, '+>', $o or croak "problem with $o $!";
+open my $U, '+>', $u or croak "problem with $u $!";
+open my $T, '+>', $t or croak "problem with $t $!";
+
+ LINE: while ( my $line = <$W> ) {
+     chomp $line;
+     next LINE if ($line =~ /answers/ );
+     next LINE unless ( $line =~ /recordings/ );
+     my ($volume,$directories,$file) = File::Spec->splitpath( $line );
+     my @dirs = split /\//, $directories;
+     my $b = basename $line, ".wav";
+     my ($sk,$r) = split /\_/, $b, 2;
+     my $s = $dirs[-1];
+     my $rid = $sk . '_' . $r;
+     if ( exists $p{$b} ) {
+	 print $T "$rid\t$p{$b}\n";
+     } elsif ( defined $rid ) {
+	 warn  "problem\t$rid";
+	 next LINE;
+     } else {
+	 croak "$line";
+     }
+
+     print $O "$rid sox $line -t wav - |\n";
+	print $U "$rid\t${sk}_r\n";
+}
+close $T;
+close $O;
+close $U;
+close $W;
diff --git a/egs/tunisian_msa/s5/path.sh b/egs/tunisian_msa/s5/path.sh
new file mode 100644
index 00000000000..705600ad47a
--- /dev/null
+++ b/egs/tunisian_msa/s5/path.sh
@@ -0,0 +1,8 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+
+# For now, don't include any of the optional dependenices of the main
+# librispeech recipe
diff --git a/egs/tunisian_msa/s5/run.sh b/egs/tunisian_msa/s5/run.sh
new file mode 100755
index 00000000000..107acdf271c
--- /dev/null
+++ b/egs/tunisian_msa/s5/run.sh
@@ -0,0 +1,190 @@
+#!/bin/bash 
+
+# Trains on 11 hours of speechfrom CTELL{ONE,TWO,THREE,FOUR,FIVE}
+# Uses the QCRI vowelized Arabic lexicon.
+# Converts the Buckwalter encoding to utf8.
+. ./cmd.sh
+. ./path.sh
+stage=0
+
+. ./utils/parse_options.sh
+
+set -e
+set -o pipefail
+set u
+
+# Do not change tmpdir, other scripts under local depend on it
+tmpdir=data/local/tmp
+
+# The speech corpus is on openslr.org
+speech="http://www.openslr.org/resources/46/Tunisian_MSA.tar.gz"
+
+# We use the QCRI lexicon.
+lex="http://alt.qcri.org/resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2"
+
+# We train the lm on subtitles.
+subs_src="http://opus.nlpl.eu/download.php?f=OpenSubtitles2018/mono/OpenSubtitles2018.ar.gz"
+
+if [ $stage -le 1 ]; then
+  # Downloads archive to this script's directory
+  local/tamsa_download.sh $speech
+
+  local/qcri_lexicon_download.sh $lex
+
+  local/subs_download.sh $subs_src
+fi
+
+# preparation stages will store files under data/
+# Delete the entire data directory when restarting.
+if [ $stage -le 2 ]; then
+  local/prepare_data.sh
+fi
+
+if [ $stage -le 3 ]; then
+  mkdir -p $tmpdir/dict
+  local/qcri_buckwalter2utf8.sh > $tmpdir/dict/qcri_utf8.txt
+fi
+
+if [ $stage -le 4 ]; then
+  local/prepare_dict.sh $tmpdir/dict/qcri_utf8.txt
+fi
+
+if [ $stage -le 5 ]; then
+  # prepare the lang directory
+  utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "Preparing the subs data for lm training."
+  local/subs_prepare_data.pl 
+fi
+
+if [ $stage -le 7 ]; then
+  echo "lm training."
+  local/prepare_lm.sh  $tmpdir/subs/lm/in_vocabulary.txt
+fi
+
+if [ $stage -le 8 ]; then
+  echo "Making grammar fst."
+  utils/format_lm.sh \
+    data/lang data/local/lm/trigram.arpa.gz data/local/dict/lexicon.txt \
+    data/lang_test
+fi
+
+if [ $stage -le 9 ]; then
+  # extract acoustic features
+  for fld in devtest train test; do
+    steps/make_mfcc.sh data/$fld exp/make_mfcc/$fld mfcc
+    utils/fix_data_dir.sh data/$fld
+    steps/compute_cmvn_stats.sh data/$fld exp/make_mfcc mfcc
+    utils/fix_data_dir.sh data/$fld
+  done
+fi
+
+if [ $stage -le 10 ]; then
+  echo "$0: monophone training"
+  steps/train_mono.sh  data/train data/lang exp/mono
+fi
+
+if [ $stage -le 11 ]; then
+  # monophone evaluation
+  (
+    # make decoding graph for monophones
+    utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph
+
+    # test monophones
+    for x in devtest test; do
+      nspk=$(wc -l < data/$x/spk2utt)
+      steps/decode.sh  --nj $nspk exp/mono/graph data/$x exp/mono/decode_${x}
+    done
+  ) &
+fi
+
+if [ $stage -le 12 ]; then
+  # align with monophones
+  steps/align_si.sh  data/train data/lang exp/mono exp/mono_ali
+fi
+
+if [ $stage -le 13 ]; then
+  echo "$0: Starting  triphone training in exp/tri1"
+  steps/train_deltas.sh \
+    --boost-silence 1.25 1000 6000 data/train data/lang exp/mono_ali exp/tri1
+fi
+
+wait
+
+if [ $stage -le 14 ]; then
+  # test cd gmm hmm models
+  # make decoding graphs for tri1
+  (
+    utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
+
+    # decode test data with tri1 models
+    for x in devtest test; do
+      nspk=$(wc -l < data/$x/spk2utt)
+      steps/decode.sh --nj $nspk exp/tri1/graph data/$x exp/tri1/decode_${x}
+    done
+  ) &
+fi
+
+if [ $stage -le 15 ]; then
+  # align with triphones
+  steps/align_si.sh  data/train data/lang exp/tri1 exp/tri1_ali
+fi
+
+if [ $stage -le 16 ]; then
+  echo "$0: Starting (lda_mllt) triphone training in exp/tri2b"
+  steps/train_lda_mllt.sh \
+    --splice-opts "--left-context=3 --right-context=3" 500 5000 \
+    data/train data/lang exp/tri1_ali exp/tri2b
+fi
+
+wait
+
+if [ $stage -le 17 ]; then
+  (
+    #  make decoding FSTs for tri2b models
+    utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph
+
+    # decode  test with tri2b models
+    for x in devtest test; do
+      nspk=$(wc -l < data/$x/spk2utt)
+      steps/decode.sh --nj $nspk exp/tri2b/graph data/$x exp/tri2b/decode_${x}
+    done
+  ) &
+fi
+
+if [ $stage -le 18 ]; then
+  # align with lda and mllt adapted triphones
+  steps/align_si.sh \
+    --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali
+fi
+
+if [ $stage -le 19 ]; then
+  echo "$0: Starting (SAT) triphone training in exp/tri3b"
+  steps/train_sat.sh 800 8000 data/train data/lang exp/tri2b_ali exp/tri3b
+fi
+
+if [ $stage -le 20 ]; then
+  (
+    # make decoding graphs for SAT models
+    utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph
+
+    # decode test sets with tri3b models
+    for x in devtest test; do
+      nspk=$(wc -l < data/$x/spk2utt)
+      steps/decode_fmllr.sh --nj $nspk exp/tri3b/graph data/$x exp/tri3b/decode_${x}
+    done
+  ) &
+fi
+
+if [ $stage -le 21 ]; then
+  # align with tri3b models
+  echo "$0: Starting exp/tri3b_ali"
+  steps/align_fmllr.sh data/train data/lang exp/tri3b exp/tri3b_ali
+fi
+
+if [ $stage -le 22 ]; then
+  # train and test chain models
+  local/chain/run_tdnn.sh
+fi
diff --git a/egs/tunisian_msa/s5/steps b/egs/tunisian_msa/s5/steps
new file mode 120000
index 00000000000..6e99bf5b5ad
--- /dev/null
+++ b/egs/tunisian_msa/s5/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/tunisian_msa/s5/utils b/egs/tunisian_msa/s5/utils
new file mode 120000
index 00000000000..b240885218f
--- /dev/null
+++ b/egs/tunisian_msa/s5/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/wsj/s5/utils/parallel/pbs.pl b/egs/wsj/s5/utils/parallel/pbs.pl
index cbde8eb86d5..35a33ba2dca 100755
--- a/egs/wsj/s5/utils/parallel/pbs.pl
+++ b/egs/wsj/s5/utils/parallel/pbs.pl
@@ -18,12 +18,10 @@
 # names and the commands configurable, as similar problems can be expected
 # with Torque, Univa... and who knows what else
 #
-# queue.pl has the same functionality as run.pl, except that
-# it runs the job in question on the queue (Sun GridEngine).
-# This version of queue.pl uses the task array functionality
-# of the grid engine.  Note: it's different from the queue.pl
-# in the s4 and earlier scripts.
-
+# pbs.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (PBS).
+# This version of pbs.pl uses the task array functionality
+# of PBS.  
 # The script now supports configuring the queue system using a config file
 # (default in conf/pbs.conf; but can be passed specified with --config option)
 # and a set of command line options.
@@ -78,12 +76,12 @@
 
 sub print_usage() {
   print STDERR
-   "Usage: queue.pl [options] [JOB=1:n] log-file command-line arguments...\n" .
-   "e.g.: queue.pl foo.log echo baz\n" .
+   "Usage: pbs.pl [options] [JOB=1:n] log-file command-line arguments...\n" .
+   "e.g.: pbs.pl foo.log echo baz\n" .
    " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
-   "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+   "or: pbs.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
    " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
-   "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+   "or: pbs.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
    " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
    "  another string other than JOB)\n" .
    "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
@@ -113,7 +111,7 @@ ()
     } else {
       my $argument = shift @ARGV;
       if ($argument =~ m/^--/) {
-        print STDERR "queue.pl: Warning: suspicious argument '$argument' to $switch; starts with '-'\n";
+        print STDERR "pbs.pl: Warning: suspicious argument '$argument' to $switch; starts with '-'\n";
       }
       if ($switch eq "-sync" && $argument =~ m/^[yY]/) {
         $sync = 1;
@@ -141,7 +139,7 @@ ()
     $jobend = $3;
     shift;
     if ($jobstart > $jobend) {
-      die "queue.pl: invalid job range $ARGV[0]";
+      die "pbs.pl: invalid job range $ARGV[0]";
     }
     if ($jobstart <= 0) {
       die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is a GridEngine limitation).";
@@ -153,7 +151,7 @@ ()
     $jobend = $2;
     shift;
   } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
-    print STDERR "queue.pl: Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+    print STDERR "pbs.pl: Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
   }
 }
 
@@ -248,7 +246,7 @@ ()
       $cli_options{$option} = $value;
     }
   } else {
-    print STDERR "queue.pl: unable to parse line '$line' in config file ($config)\n";
+    print STDERR "pbs.pl: unable to parse line '$line' in config file ($config)\n";
     exit(1);
   }
 }
@@ -256,7 +254,7 @@ ()
 close(CONFIG);
 
 if ($read_command != 1) {
-  print STDERR "queue.pl: config file ($config) does not contain the line \"command .*\"\n";
+  print STDERR "pbs.pl: config file ($config) does not contain the line \"command .*\"\n";
   exit(1);
 }
 
@@ -271,7 +269,7 @@ ()
     $qsub_opts .= "$cli_config_options{$option} ";
   } else {
     if ($opened_config_file == 0) { $config = "default config file"; }
-    die "queue.pl: Command line option $option not described in $config (or value '$value' not allowed)\n";
+    die "pbs.pl: Command line option $option not described in $config (or value '$value' not allowed)\n";
   }
 }
 
@@ -280,7 +278,7 @@ ()
 
 if ($array_job == 1 && $logfile !~ m/$jobname/
     && $jobend > $jobstart) {
-  print STDERR "queue.pl: you are trying to run a parallel job but "
+  print STDERR "pbs.pl: you are trying to run a parallel job but "
     . "you are putting the output into just one log file ($logfile)\n";
   exit(1);
 }
@@ -289,7 +287,7 @@ ()
 # Work out the command; quote escaping is done here.
 # Note: the rules for escaping stuff are worked out pretty
 # arbitrarily, based on what we want it to do.  Some things that
-# we pass as arguments to queue.pl, such as "|", we want to be
+# we pass as arguments to pbs.pl, such as "|", we want to be
 # interpreted by bash, so we don't escape them.  Other things,
 # such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
 # to be passed, in quotes, to the Kaldi program.  Our heuristic
@@ -394,9 +392,9 @@ ()
 if ($ret != 0) {
   if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
     if (defined $jobname) { $logfile =~ s/\$PBS_ARRAY_INDEX/*/g; }
-    print STDERR "queue.pl: job writing to $logfile failed\n";
+    print STDERR "pbs.pl: job writing to $logfile failed\n";
   } else {
-    print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+    print STDERR "pbs.pl: error submitting jobs to queue (return status was $ret)\n";
     print STDERR "queue log file is $queue_logfile, command was $qsub_cmd\n";
     print STDERR `tail $queue_logfile`;
   }
@@ -501,13 +499,13 @@ ()
             # time elapsed between file modification and the start of this
             # program], then we assume the program really finished OK,
             # and maybe something is up with the file system.
-            print STDERR "**queue.pl: syncfile $f was not created but job seems\n" .
+            print STDERR "**pbs.pl: syncfile $f was not created but job seems\n" .
               "**to have finished OK.  Probably your file-system has problems.\n" .
               "**This is just a warning.\n";
             last;
           } else {
             chop $last_line;
-            print STDERR "queue.pl: Error, unfinished job no " .
+            print STDERR "pbs.pl: Error, unfinished job no " .
               "longer exists, log is in $logfile, last line is '$last_line', " .
               "syncfile is $f, return status of qstat was $ret\n" .
               "Possible reasons: a) Exceeded time limit? -> Use more jobs!" .
@@ -515,7 +513,7 @@ ()
             exit(1);
           }
         } elsif ($ret != 0) {
-          print STDERR "queue.pl: Warning: qstat command returned status $ret (qstat -t $sge_job_id,$!)\n";
+          print STDERR "pbs.pl: Warning: qstat command returned status $ret (qstat -t $sge_job_id,$!)\n";
         }
       }
     }
@@ -574,14 +572,14 @@ ()
 else { # we failed.
   if (@logfiles == 1) {
     if (defined $jobname) { $logfile =~ s/\$PBS_ARRAY_INDEX/$jobstart/g; }
-    print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+    print STDERR "pbs.pl: job failed with status $status, log is in $logfile\n";
     if ($logfile =~ m/JOB/) {
-      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+      print STDERR "pbs.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
     }
   } else {
     if (defined $jobname) { $logfile =~ s/\$PBS_ARRAY_INDEX/*/g; }
     my $numjobs = 1 + $jobend - $jobstart;
-    print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+    print STDERR "pbs.pl: $num_failed / $numjobs failed, log is in $logfile\n";
   }
   exit(1);
 }