From 5fe6cb224592cfd9590f6bd20ba3c5c2114a510c Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 17:05:34 -0400
Subject: [PATCH 01/67] minor change

---
 .../v1/local/chain/run_cnn_e2eali_1b.sh       |   2 +-
 .../v1/local/chain/run_flatstart_cnn1a.sh     |   2 +-
 egs/madcat_ar/v1/local/extract_features.sh    |   4 +
 egs/madcat_ar/v1/local/process_data.py        |  72 +--
 egs/madcat_ar/v1/local/tl/augment_data.sh     |  34 ++
 ...eate_line_image_from_page_image.py.augment | 528 ++++++++++++++++++
 .../v1/local/tl/imp/make_features.py          | 170 ++++++
 egs/madcat_ar/v1/local/tl/imp/process_data.py | 215 +++++++
 .../v1/local/tl/imp/process_waldo_data.py     |  62 ++
 .../v1/local/tl/not_much_imp/prepare_data.sh  |  49 ++
 .../tl/not_much_imp/run_cnn_e2eali_1b.sh      | 246 ++++++++
 .../v1/local/tl/not_much_imp/run_end2end.sh   | 125 +++++
 .../tl/not_much_imp/run_flatstart_cnn1a.sh    | 168 ++++++
 .../v1/local/tl/not_much_imp/score.sh         |   6 +
 .../v1/local/tl/run_textlocalization.sh       | 128 +++++
 egs/madcat_ar/v1/run_end2end.sh               |   5 +-
 egs/wsj/s5/utils/lang/make_lexicon_fst.py     |   2 +-
 17 files changed, 1770 insertions(+), 48 deletions(-)
 create mode 100755 egs/madcat_ar/v1/local/tl/augment_data.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment
 create mode 100755 egs/madcat_ar/v1/local/tl/imp/make_features.py
 create mode 100755 egs/madcat_ar/v1/local/tl/imp/process_data.py
 create mode 100755 egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py
 create mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/score.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/run_textlocalization.sh

diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
index 75c246f5ffe..55df0cad4b7 100755
--- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -193,7 +193,7 @@ if [ $stage -le 5 ]; then
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
index 2c85e982ce6..4eea10a8441 100755
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
@@ -33,7 +33,7 @@ num_jobs_final=16
 minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
 common_egs_dir=
 l2_regularize=0.00005
-frames_per_iter=1000000
+frames_per_iter=2000000
 cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
 lang_test=lang_test
diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 70c5498626c..56a8443e328 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
+
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
 nj=4
 cmd=run.pl
 feat_dim=40
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index b57500cf2fa..920cb6f700b 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -42,6 +42,8 @@
                     help='Path to the downloaded (and extracted) writing conditions file 2')
 parser.add_argument('writing_condition3', type=str,
                     help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
 args = parser.parse_args()
 
 
@@ -97,50 +99,40 @@ def check_writing_condition(wc_dict):
     Returns:
         (bool): True if writing condition matches.
     """
-    return True
-    writing_condition = wc_dict[base_name].strip()
-    if writing_condition != 'IUC':
-        return False
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
 
-    return True
 
-
-def get_word_line_mapping(madcat_file_path):
+def read_text(madcat_file_path):
     """ Maps every word in the page image to a  corresponding line.
     Args:
-         madcat_file_path (string): complete path and name of the madcat xml file
+        madcat_file_path (string): complete path and name of the madcat xml file
                                   corresponding to the page image.
     Returns:
+        dict: Mapping every word in the page image to a  corresponding line.
     """
+
+    word_line_dict = dict()
     doc = minidom.parse(madcat_file_path)
     zone = doc.getElementsByTagName('zone')
     for node in zone:
         line_id = node.getAttribute('id')
-        line_word_dict[line_id] = list()
         word_image = node.getElementsByTagName('token-image')
         for tnode in word_image:
             word_id = tnode.getAttribute('id')
-            line_word_dict[line_id].append(word_id)
             word_line_dict[word_id] = line_id
 
-
-def read_text(madcat_file_path):
-    """ Maps every word in the page image to a  corresponding line.
-    Args:
-        madcat_file_path (string): complete path and name of the madcat xml file
-                                  corresponding to the page image.
-    Returns:
-        dict: Mapping every word in the page image to a  corresponding line.
-    """
     text_line_word_dict = dict()
-    doc = minidom.parse(madcat_file_path)
     segment = doc.getElementsByTagName('segment')
     for node in segment:
         token = node.getElementsByTagName('token')
         for tnode in token:
             ref_word_id = tnode.getAttribute('ref_id')
             word = tnode.getElementsByTagName('source')[0].firstChild.nodeValue
-            word = unicodedata.normalize('NFKC',word)
             ref_line_id = word_line_dict[ref_word_id]
             if ref_line_id not in text_line_word_dict:
                 text_line_word_dict[ref_line_id] = list()
@@ -160,7 +152,6 @@ def get_line_image_location():
 
 
 ### main ###
-
 print("Processing '{}' data...".format(args.out_dir))
 
 text_file = os.path.join(args.out_dir, 'text')
@@ -188,24 +179,19 @@ def get_line_image_location():
             madcat_xml_path, image_file_path, wc_dict = check_file_location()
             if wc_dict is None or not check_writing_condition(wc_dict):
                 continue
-            if madcat_xml_path is not None:
-                madcat_doc = minidom.parse(madcat_xml_path)
-                writer = madcat_doc.getElementsByTagName('writer')
-                writer_id = writer[0].getAttribute('id')
-                line_word_dict = dict()
-                word_line_dict = dict()
-                get_word_line_mapping(madcat_xml_path)
-                text_line_word_dict = read_text(madcat_xml_path)
-                base_name = os.path.basename(image_file_path)
-                base_name, b = base_name.split('.tif')
-                for lineID in sorted(text_line_word_dict):
-                    updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
-                    location = image_loc_dict[updated_base_name]
-                    image_file_path = os.path.join(location, updated_base_name)
-                    line = text_line_word_dict[lineID]
-                    text = ' '.join(line)
-                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
-                    text_fh.write(utt_id + ' ' + text + '\n')
-                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
-                    image_num += 1
+            madcat_doc = minidom.parse(madcat_xml_path)
+            writer = madcat_doc.getElementsByTagName('writer')
+            writer_id = writer[0].getAttribute('id')
+            text_line_word_dict = read_text(madcat_xml_path)
+            base_name = os.path.basename(image_file_path).split('.tif')[0]
+            for lineID in sorted(text_line_word_dict):
+                updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
+                location = image_loc_dict[updated_base_name]
+                image_file_path = os.path.join(location, updated_base_name)
+                line = text_line_word_dict[lineID]
+                text = ' '.join(line)
+                utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
+                text_fh.write(utt_id + ' ' + text + '\n')
+                utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                image_num += 1
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
new file mode 100755
index 00000000000..31e4a8217ca
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+aug_set=aug1
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp"
+
+for set in $aug_set; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --fliplr false --augment true $datadir/augmentations/$set
+done
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment b/egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment
new file mode 100755
index 00000000000..da2b0f0a62f
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment
@@ -0,0 +1,528 @@
+#!/usr/bin/env python3
+
+# Copyright   2018 Ashish Arora
+# Apache 2.0
+# minimum bounding box part in this script is originally from
+#https://github.com/BebeSparkelSparkel/MinimumBoundingBox
+#https://startupnextdoor.com/computing-convex-hull-in-python/
+""" This module will be used for extracting line images from page image.
+ Given the word segmentation (bounding box around a word) for every word, it will
+ extract line segmentation. To extract line segmentation, it will take word bounding
+ boxes of a line as input, will create a minimum area bounding box that will contain
+ all corner points of word bounding boxes. The obtained bounding box (will not necessarily
+ be vertically or horizontally aligned). Hence to extract line image from line bounding box,
+ page image is rotated and line image is cropped and saved.
+"""
+
+import sys
+import argparse
+import os
+import xml.dom.minidom as minidom
+import numpy as np
+from math import atan2, cos, sin, pi, degrees, sqrt
+from collections import namedtuple
+import random
+from scipy.spatial import ConvexHull
+from PIL import Image
+from scipy.misc import toimage
+parser = argparse.ArgumentParser(description="Creates line images from page image",
+                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
+                                             " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
+                                             " data/local/lines ",
+                                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('database_path1', type=str,
+                    help='Path to the downloaded madcat data directory 1')
+parser.add_argument('database_path2', type=str,
+                    help='Path to the downloaded madcat data directory 2')
+parser.add_argument('database_path3', type=str,
+                    help='Path to the downloaded madcat data directory 3')
+parser.add_argument('data_splits', type=str,
+                    help='Path to file that contains the train/test/dev split information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files')
+parser.add_argument('writing_condition1', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 1')
+parser.add_argument('writing_condition2', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 2')
+parser.add_argument('writing_condition3', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument('--padding', type=int, default=400,
+                    help='padding across horizontal/verticle direction')
+parser.add_argument('--pixel-scaling', type=int, default=30,
+                    help='padding across horizontal/verticle direction')
+args = parser.parse_args()
+
+"""
+bounding_box is a named tuple which contains:
+             area (float): area of the rectangle
+             length_parallel (float): length of the side that is parallel to unit_vector
+             length_orthogonal (float): length of the side that is orthogonal to unit_vector
+             rectangle_center(int, int): coordinates of the rectangle center
+             (use rectangle_corners to get the corner points of the rectangle)
+             unit_vector (float, float): direction of the length_parallel side.
+             (it's orthogonal vector can be found with the orthogonal_vector function
+             unit_vector_angle (float): angle of the unit vector to be in radians.
+             corner_points [(float, float)]: set that contains the corners of the rectangle
+"""
+
+bounding_box_tuple = namedtuple('bounding_box_tuple', 'area '
+                                        'length_parallel '
+                                        'length_orthogonal '
+                                        'rectangle_center '
+                                        'unit_vector '
+                                        'unit_vector_angle '
+                                        'corner_points'
+                         )
+
+
+def unit_vector(pt0, pt1):
+    """ Given two points pt0 and pt1, return a unit vector that
+        points in the direction of pt0 to pt1.
+    Returns
+    -------
+    (float, float): unit vector
+    """
+    dis_0_to_1 = sqrt((pt0[0] - pt1[0])**2 + (pt0[1] - pt1[1])**2)
+    return (pt1[0] - pt0[0]) / dis_0_to_1, \
+           (pt1[1] - pt0[1]) / dis_0_to_1
+
+
+def orthogonal_vector(vector):
+    """ Given a vector, returns a orthogonal/perpendicular vector of equal length.
+    Returns
+    ------
+    (float, float): A vector that points in the direction orthogonal to vector.
+    """
+    return -1 * vector[1], vector[0]
+
+
+def bounding_area(index, hull):
+    """ Given index location in an array and convex hull, it gets two points
+        hull[index] and hull[index+1]. From these two points, it returns a named
+        tuple that mainly contains area of the box that bounds the hull. This
+        bounding box orintation is same as the orientation of the lines formed
+        by the point hull[index] and hull[index+1].
+    Returns
+    -------
+    a named tuple that contains:
+    area: area of the rectangle
+    length_parallel: length of the side that is parallel to unit_vector
+    length_orthogonal: length of the side that is orthogonal to unit_vector
+    rectangle_center: coordinates of the rectangle center
+    unit_vector: direction of the length_parallel side.
+    (it's orthogonal vector can be found with the orthogonal_vector function)
+    """
+    unit_vector_p = unit_vector(hull[index], hull[index+1])
+    unit_vector_o = orthogonal_vector(unit_vector_p)
+
+    dis_p = tuple(np.dot(unit_vector_p, pt) for pt in hull)
+    dis_o = tuple(np.dot(unit_vector_o, pt) for pt in hull)
+
+    min_p = min(dis_p)
+    min_o = min(dis_o)
+    len_p = max(dis_p) - min_p
+    len_o = max(dis_o) - min_o
+
+    return {'area': len_p * len_o,
+            'length_parallel': len_p,
+            'length_orthogonal': len_o,
+            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'unit_vector': unit_vector_p,
+            }
+
+
+def to_xy_coordinates(unit_vector_angle, point):
+    """ Given angle from horizontal axis and a point from origin,
+        returns converted unit vector coordinates in x, y coordinates.
+        angle of unit vector should be in radians.
+    Returns
+    ------
+    (float, float): converted x,y coordinate of the unit vector.
+    """
+    angle_orthogonal = unit_vector_angle + pi / 2
+    return point[0] * cos(unit_vector_angle) + point[1] * cos(angle_orthogonal), \
+           point[0] * sin(unit_vector_angle) + point[1] * sin(angle_orthogonal)
+
+
+def rotate_points(center_of_rotation, angle, points):
+    """ Rotates a point cloud around the center_of_rotation point by angle
+    input
+    -----
+    center_of_rotation (float, float): angle of unit vector to be in radians.
+    angle (float): angle of rotation to be in radians.
+    points [(float, float)]: Points to be a list or tuple of points. Points to be rotated.
+    Returns
+    ------
+    [(float, float)]: Rotated points around center of rotation by angle
+    """
+    rot_points = []
+    ang = []
+    for pt in points:
+        diff = tuple([pt[d] - center_of_rotation[d] for d in range(2)])
+        diff_angle = atan2(diff[1], diff[0]) + angle
+        ang.append(diff_angle)
+        diff_length = sqrt(sum([d**2 for d in diff]))
+        rot_points.append((center_of_rotation[0] + diff_length * cos(diff_angle),
+                           center_of_rotation[1] + diff_length * sin(diff_angle)))
+
+    return rot_points
+
+
+def rectangle_corners(rectangle):
+    """ Given rectangle center and its inclination, returns the corner
+        locations of the rectangle.
+    Returns
+    ------
+    [(float, float)]: 4 corner points of rectangle.
+    """
+    corner_points = []
+    for i1 in (.5, -.5):
+        for i2 in (i1, -1 * i1):
+            corner_points.append((rectangle['rectangle_center'][0] + i1 * rectangle['length_parallel'],
+                            rectangle['rectangle_center'][1] + i2 * rectangle['length_orthogonal']))
+
+    return rotate_points(rectangle['rectangle_center'], rectangle['unit_vector_angle'], corner_points)
+
+
+def minimum_bounding_box(points):
+    """ Given a list of 2D points, it returns the minimum area rectangle bounding all
+        the points in the point cloud.
+    Returns
+    ------
+    returns a namedtuple that contains:
+    area: area of the rectangle
+    length_parallel: length of the side that is parallel to unit_vector
+    length_orthogonal: length of the side that is orthogonal to unit_vector
+    rectangle_center: coordinates of the rectangle center
+    unit_vector: direction of the length_parallel side. RADIANS
+    unit_vector_angle: angle of the unit vector
+    corner_points: set that contains the corners of the rectangle
+    """
+
+    if len(points) <= 2: raise ValueError('More than two points required.')
+
+    hull_ordered = [points[index] for index in ConvexHull(points).vertices]
+    hull_ordered.append(hull_ordered[0])
+    hull_ordered = tuple(hull_ordered)
+
+    min_rectangle = bounding_area(0, hull_ordered)
+    for i in range(1, len(hull_ordered)-1):
+        rectangle = bounding_area(i, hull_ordered)
+        if rectangle['area'] < min_rectangle['area']:
+            min_rectangle = rectangle
+
+    min_rectangle['unit_vector_angle'] = atan2(min_rectangle['unit_vector'][1], min_rectangle['unit_vector'][0])
+    min_rectangle['rectangle_center'] = to_xy_coordinates(min_rectangle['unit_vector_angle'], min_rectangle['rectangle_center'])
+
+    return bounding_box_tuple(
+        area = min_rectangle['area'],
+        length_parallel = min_rectangle['length_parallel'],
+        length_orthogonal = min_rectangle['length_orthogonal'],
+        rectangle_center = min_rectangle['rectangle_center'],
+        unit_vector = min_rectangle['unit_vector'],
+        unit_vector_angle = min_rectangle['unit_vector_angle'],
+        corner_points = set(rectangle_corners(min_rectangle))
+    )
+
+
+def get_center(im):
+    """ Given image, returns the location of center pixel
+    Returns
+    -------
+    (int, int): center of the image
+    """
+    center_x = im.size[0] / 2
+    center_y = im.size[1] / 2
+    return int(center_x), int(center_y)
+
+
+def get_horizontal_angle(unit_vector_angle):
+    """ Given an angle in radians, returns angle of the unit vector in
+        first or fourth quadrant.
+    Returns
+    ------
+    (float): updated angle of the unit vector to be in radians.
+             It is only in first or fourth quadrant.
+    """
+    if unit_vector_angle > pi / 2 and unit_vector_angle <= pi:
+        unit_vector_angle = unit_vector_angle - pi
+    elif unit_vector_angle > -pi and unit_vector_angle < -pi / 2:
+        unit_vector_angle = unit_vector_angle + pi
+
+    return unit_vector_angle
+
+
+def get_smaller_angle(bounding_box):
+    """ Given a rectangle, returns its smallest absolute angle from horizontal axis.
+    Returns
+    ------
+    (float): smallest angle of the rectangle to be in radians.
+    """
+    unit_vector = bounding_box.unit_vector
+    unit_vector_angle = bounding_box.unit_vector_angle
+    ortho_vector = orthogonal_vector(unit_vector)
+    ortho_vector_angle = atan2(ortho_vector[1], ortho_vector[0])
+
+    unit_vector_angle_updated = get_horizontal_angle(unit_vector_angle)
+    ortho_vector_angle_updated = get_horizontal_angle(ortho_vector_angle)
+
+    if abs(unit_vector_angle_updated) < abs(ortho_vector_angle_updated):
+        return unit_vector_angle_updated
+    else:
+        return ortho_vector_angle_updated
+
+
+def rotated_points(bounding_box, center):
+    """ Given the rectangle, returns corner points of rotated rectangle.
+        It rotates the rectangle around the center by its smallest angle.
+    Returns
+    -------
+    [(int, int)]: 4 corner points of rectangle.
+    """
+    p1, p2, p3, p4 = bounding_box.corner_points
+    x1, y1 = p1
+    x2, y2 = p2
+    x3, y3 = p3
+    x4, y4 = p4
+    center_x, center_y = center
+    rotation_angle_in_rad = -get_smaller_angle(bounding_box)
+    x_dash_1 = (x1 - center_x) * cos(rotation_angle_in_rad) - (y1 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_2 = (x2 - center_x) * cos(rotation_angle_in_rad) - (y2 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_3 = (x3 - center_x) * cos(rotation_angle_in_rad) - (y3 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_4 = (x4 - center_x) * cos(rotation_angle_in_rad) - (y4 - center_y) * sin(rotation_angle_in_rad) + center_x
+
+    y_dash_1 = (y1 - center_y) * cos(rotation_angle_in_rad) + (x1 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_2 = (y2 - center_y) * cos(rotation_angle_in_rad) + (x2 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_3 = (y3 - center_y) * cos(rotation_angle_in_rad) + (x3 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_4 = (y4 - center_y) * cos(rotation_angle_in_rad) + (x4 - center_x) * sin(rotation_angle_in_rad) + center_y
+    return x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4
+
+
+def pad_image(image):
+    """ Given an image, returns a padded image around the border.
+        This routine save the code from crashing if bounding boxes that are
+        slightly outside the page boundary.
+    Returns
+    -------
+    image: page image
+    """
+    offset = int(args.padding // 2)
+    padded_image = Image.new('RGB', (image.size[0] + int(args.padding), image.size[1] + int(args.padding)), "white")
+    padded_image.paste(im = image, box = (offset, offset))
+    return padded_image
+
+
+def update_minimum_bounding_box_input(bounding_box_input):
+    """ Given list of 2D points, returns list of 2D points shifted by an offset.
+    Returns
+    ------
+    points [(float, float)]: points, a list or tuple of 2D coordinates
+    """
+    updated_minimum_bounding_box_input = []
+    offset = int(args.padding // 2)
+    for point in bounding_box_input:
+        x, y = point
+        new_x = x + offset
+        new_y = y + offset
+        word_coordinate = (new_x, new_y)
+        updated_minimum_bounding_box_input.append(word_coordinate)
+
+    return updated_minimum_bounding_box_input
+
+
+def dilate_polygon(points, amount_increase):
+    """ Increases size of polygon given as a list of tuples. Assumes points in polygon are given in CCW
+    """
+    expanded_points = []
+    for index, point in enumerate(points):
+        prev_point = points[(index - 1) % len(points)]
+        next_point = points[(index + 1) % len(points)]
+        prev_edge = np.subtract(point, prev_point)
+        next_edge = np.subtract(next_point, point)
+
+        prev_normal = ((1 * prev_edge[1]), (-1 * prev_edge[0]))
+        prev_normal = np.divide(prev_normal, np.linalg.norm(prev_normal))
+        next_normal = ((1 * next_edge[1]), (-1 * next_edge[0]))
+        next_normal = np.divide(next_normal, np.linalg.norm(next_normal))
+
+        bisect = np.add(prev_normal, next_normal)
+        bisect = np.divide(bisect, np.linalg.norm(bisect))
+
+        cos_theta = np.dot(next_normal, bisect)
+        hyp = amount_increase / cos_theta
+
+        new_point = np.around(point + hyp * bisect)
+        new_point = new_point.astype(int)
+        new_point = new_point.tolist()
+        new_point = tuple(new_point)
+        expanded_points.append(new_point)
+    return expanded_points
+
+
+def set_line_image_data(image, line_id, image_file_name, image_fh):
+    """ Given an image, saves a flipped line image. Line image file name
+        is formed by appending the line id at the end page image name.
+    """
+
+    base_name = os.path.splitext(os.path.basename(image_file_name))[0]
+    line_id = '_' + line_id.zfill(4)
+    line_image_file_name = base_name + line_id + '.png'
+    image_path = os.path.join(args.out_dir, line_image_file_name)
+    imgray = image.convert('L')
+    imgray_rev_arr = np.fliplr(imgray)
+    imgray_rev = toimage(imgray_rev_arr)
+    imgray_rev.save(image_path)
+    image_fh.write(image_path + '\n')
+
+
+def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh):
+    """ Given a page image, extracts the line images from it.
+    Input
+    -----
+    image_file_name (string): complete path and name of the page image.
+    madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    """
+    im_wo_pad = Image.open(image_file_name)
+    im = pad_image(im_wo_pad)
+    doc = minidom.parse(madcat_file_path)
+    zone = doc.getElementsByTagName('zone')
+    for node in zone:
+        id = node.getAttribute('id')
+        token_image = node.getElementsByTagName('token-image')
+        minimum_bounding_box_input = []
+        for token_node in token_image:
+            word_point = token_node.getElementsByTagName('point')
+            for word_node in word_point:
+                word_coordinate = (int(word_node.getAttribute('x')), int(word_node.getAttribute('y')))
+                minimum_bounding_box_input.append(word_coordinate)
+        updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
+        points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
+        for i in range(0, 3):
+            additional_pixel = random.randint(1, args.pixel_scaling)
+            mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
+            bounding_box = minimum_bounding_box(mar)
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+            min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+            max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+            box = (min_x, min_y, max_x, max_y)
+            region_initial = im.crop(box)
+            rot_points = []
+            p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+            p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+            rot_points.append(p1)
+            rot_points.append(p2)
+            rot_points.append(p3)
+            rot_points.append(p4)
+
+            cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                    bounding_box.length_parallel,
+                    bounding_box.length_orthogonal,
+                    bounding_box.length_orthogonal,
+                    bounding_box.unit_vector,
+                    bounding_box.unit_vector_angle,
+                    set(rot_points)
+                )
+
+            rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+            img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+            x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+                cropped_bounding_box, get_center(region_initial))
+
+            min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            box = (min_x, min_y, max_x, max_y)
+            region_final = img2.crop(box)
+            line_id = id + '_scale' + str(i)
+            set_line_image_data(region_final, line_id, image_file_name, image_fh)
+
+
+def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
+    """ Returns the complete path of the page image and corresponding
+        xml file.
+    Returns
+    -------
+    image_file_name (string): complete path and name of the page image.
+    madcat_file_path (string): complete path and name of the madcat xml file
+                               corresponding to the page image.
+    """
+    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
+
+    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
+    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
+    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
+
+    if os.path.exists(madcat_file_path1):
+        return madcat_file_path1, image_file_path1, wc_dict1
+
+    if os.path.exists(madcat_file_path2):
+        return madcat_file_path2, image_file_path2, wc_dict2
+
+    if os.path.exists(madcat_file_path3):
+        return madcat_file_path3, image_file_path3, wc_dict3
+
+    return None, None, None
+
+
+def parse_writing_conditions(writing_conditions):
+    """ Given writing condition file path, returns a dictionary which have writing condition
+        of each page image.
+    Returns
+    ------
+    (dict): dictionary with key as page image name and value as writing condition.
+    """
+    with open(writing_conditions) as f:
+        file_writing_cond = dict()
+        for line in f:
+            line_list = line.strip().split("\t")
+            file_writing_cond[line_list[0]] = line_list[3]
+    return file_writing_cond
+
+
+def check_writing_condition(wc_dict, base_name):
+    """ Given writing condition dictionary, checks if a page image is writing
+        in a specifed writing condition.
+        It is used to create subset of dataset based on writing condition.
+    Returns
+    (bool): True if writing condition matches.
+    """
+    #return True
+    writing_condition = wc_dict[base_name].strip()
+    if writing_condition != 'IUC':
+        return False
+
+    return True
+
+
+### main ###
+
+def main():
+
+    wc_dict1 = parse_writing_conditions(args.writing_condition1)
+    wc_dict2 = parse_writing_conditions(args.writing_condition2)
+    wc_dict3 = parse_writing_conditions(args.writing_condition3)
+    output_directory = args.out_dir
+    image_file = os.path.join(output_directory, 'images.scp')
+    image_fh = open(image_file, 'w', encoding='utf-8')
+
+    splits_handle = open(args.data_splits, 'r')
+    splits_data = splits_handle.read().strip().split('\n')
+    prev_base_name = ''
+    for line in splits_data:
+        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
+        if prev_base_name != base_name:
+            prev_base_name = base_name
+            madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
+            if wc_dict is None or not check_writing_condition(wc_dict, base_name):
+                continue
+            if madcat_file_path is not None:
+                get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
+
+
+if __name__ == '__main__':
+      main()
+
diff --git a/egs/madcat_ar/v1/local/tl/imp/make_features.py b/egs/madcat_ar/v1/local/tl/imp/make_features.py
new file mode 100755
index 00000000000..e9d10ecc87e
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/imp/make_features.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2018  Hossein Hadian
+
+""" This script converts images to Kaldi-format feature matrices. The input to
+    this script is the path to a data directory, e.g. "data/train". This script
+    reads the images listed in images.scp and writes them to standard output
+    (by default) as Kaldi-formatted matrices (in text form). It also scales the
+    images so they have the same height (via --feat-dim). It can optionally pad
+    the images (on left/right sides) with white pixels.
+    If an 'image2num_frames' file is found in the data dir, it will be used
+    to enforce the images to have the specified length in that file by padding
+    white pixels (the --padding option will be ignored in this case). This relates
+    to end2end chain training.
+
+    eg. local/make_features.py data/train --feat-dim 40
+"""
+import random
+import argparse
+import os
+import sys
+import numpy as np
+from scipy import misc
+import math
+
+parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
+                                                writes them to standard output in text format.""")
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
+parser.add_argument('--out-ark', type=str, default='-',
+                    help='Where to write the output feature file')
+parser.add_argument('--feat-dim', type=int, default=40,
+                    help='Size to scale the height of all images')
+parser.add_argument('--padding', type=int, default=5,
+                    help='Number of white pixels to pad on the left'
+                    'and right side of the image.')
+parser.add_argument('--vertical-shift', type=int, default=16,
+                    help='total number of padding pixel per column')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+args = parser.parse_args()
+
+
+def write_kaldi_matrix(file_handle, matrix, key):
+    file_handle.write(key + " [ ")
+    num_rows = len(matrix)
+    if num_rows == 0:
+        raise Exception("Matrix is empty")
+    num_cols = len(matrix[0])
+
+    for row_index in range(len(matrix)):
+        if num_cols != len(matrix[row_index]):
+            raise Exception("All the rows of a matrix are expected to "
+                            "have the same length")
+        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        if row_index != num_rows - 1:
+            file_handle.write("\n")
+    file_handle.write(" ]\n")
+
+
+def get_scaled_image(im):
+    scale_size = args.feat_dim
+    sx = im.shape[1]  # width
+    sy = im.shape[0]  # height
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx)
+    im = misc.imresize(im, (nx, ny))
+    return im
+
+
+def horizontal_pad(im, allowed_lengths = None):
+    if allowed_lengths is None:
+        left_padding = right_padding = args.padding
+    else:  # Find an allowed length for the image
+        imlen = im.shape[1] # width
+        allowed_len = 0
+        for l in allowed_lengths:
+            if l > imlen:
+                allowed_len = l
+                break
+        if allowed_len == 0:
+            #  No allowed length was found for the image (the image is too long)
+            return None
+        padding = allowed_len - imlen
+        left_padding = int(padding // 2)
+        right_padding = padding - left_padding
+    dim_y = im.shape[0] # height
+    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
+                                           dtype=int), im), axis=1)
+    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
+                                                    dtype=int)), axis=1)
+    return im_pad1
+
+def vertical_shift(im, mode='mid'):
+    total = args.vertical_shift
+    if mode == 'notmid':
+        val = random.randint(0, 1)
+        if val == 0:
+            mode = 'top'
+        else:
+            mode = 'bottom'
+    if mode == 'mid':
+        top = int(total / 2)
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
+if args.out_ark == '-':
+    out_fh = sys.stdout
+else:
+    out_fh = open(args.out_ark,'w')
+
+allowed_lengths = None
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
+    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
+    allowed_lengths = []
+    with open(allowed_len_handle) as f:
+        for line in f:
+            allowed_lengths.append(int(line.strip()))
+    print("Read {} allowed lengths and will apply them to the "
+          "features.".format(len(allowed_lengths)), file=sys.stderr)
+
+num_fail = 0
+num_ok = 0
+aug_setting = ['mid', 'notmid']
+with open(data_list_path) as f:
+    for line in f:
+        line = line.strip()
+        line_vect = line.split(' ')
+        image_id = line_vect[0]
+        image_path = line_vect[1]
+        im = misc.imread(image_path)
+        im_scaled = get_scaled_image(im)
+        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
+        if im_horizontal_padded is None:
+            num_fail += 1
+            continue
+        if args.augment:
+            im_shift = vertical_shift(im_horizontal_padded, shift_setting[1])
+        else:
+            im_shift = vertical_shift(im_horizontal_padded, shift_setting[0])
+        data = np.transpose(im_shift, (1, 0))
+        data = np.divide(data, 255.0)
+        num_ok += 1
+        write_kaldi_matrix(out_fh, data, image_id)
+
+print('Generated features for {} images. Failed for {} (image too '
+      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/local/tl/imp/process_data.py b/egs/madcat_ar/v1/local/tl/imp/process_data.py
new file mode 100755
index 00000000000..c21beb1be70
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/imp/process_data.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+
+# Copyright  2018  Ashish Arora
+
+""" This script reads MADCAT files and creates the following files (for the
+    data subset selected via --dataset) :text, utt2spk, images.scp.
+  Eg. local/process_data.py data/local /export/corpora/LDC/LDC2012T15 /export/corpora/LDC/LDC2013T09
+      /export/corpora/LDC/LDC2013T15 data/download/data_splits/madcat.train.raw.lineid
+      data/dev data/local/lines/images.scp
+  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 وجه وعقل غارق حتّى النخاع
+      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
+      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
+"""
+
+import argparse
+import os
+import sys
+import xml.dom.minidom as minidom
+import unicodedata
+
+parser = argparse.ArgumentParser(description="Creates text, utt2spk and images.scp files",
+                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
+                                 " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
+                                 " data/train data/local/lines ",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('database_path1', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('database_path2', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('database_path3', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('data_splits', type=str,
+                    help='Path to file that contains the train/test/dev split information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files.')
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of input images.scp file(maps line image and location)')
+parser.add_argument('writing_condition1', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 1')
+parser.add_argument('writing_condition2', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 2')
+parser.add_argument('writing_condition3', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
+args = parser.parse_args()
+
+
+def check_file_location():
+    """ Returns the complete path of the page image and corresponding
+        xml file.
+    Args:
+    Returns:
+        image_file_name (string): complete path and name of the page image.
+        madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    """
+    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
+
+    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
+    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
+    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
+
+    if os.path.exists(madcat_file_path1):
+        return madcat_file_path1, image_file_path1, wc_dict1
+
+    if os.path.exists(madcat_file_path2):
+        return madcat_file_path2, image_file_path2, wc_dict2
+
+    if os.path.exists(madcat_file_path3):
+        return madcat_file_path3, image_file_path3, wc_dict3
+
+    return None, None, None
+
+
+def parse_writing_conditions(writing_conditions):
+    """ Returns a dictionary which have writing condition of each page image.
+    Args:
+         writing_conditions(string): complete path of writing condition file.
+    Returns:
+        (dict): dictionary with key as page image name and value as writing condition.
+    """
+    with open(writing_conditions) as f:
+        file_writing_cond = dict()
+        for line in f:
+            line_list = line.strip().split("\t")
+            file_writing_cond[line_list[0]] = line_list[3]
+    return file_writing_cond
+
+
+def check_writing_condition(wc_dict):
+    """ Checks if a given page image is writing in a given writing condition.
+        It is used to create subset of dataset based on writing condition.
+    Args:
+         wc_dict (dict): dictionary with key as page image name and value as writing condition.
+    Returns:
+        (bool): True if writing condition matches.
+    """
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
+
+
+def read_text(madcat_file_path):
+    """ Maps every word in the page image to a  corresponding line.
+    Args:
+        madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    Returns:
+        dict: Mapping every word in the page image to a  corresponding line.
+    """
+
+    word_line_dict = dict()
+    doc = minidom.parse(madcat_file_path)
+    zone = doc.getElementsByTagName('zone')
+    for node in zone:
+        line_id = node.getAttribute('id')
+        word_image = node.getElementsByTagName('token-image')
+        for tnode in word_image:
+            word_id = tnode.getAttribute('id')
+            word_line_dict[word_id] = line_id
+
+    text_line_word_dict = dict()
+    segment = doc.getElementsByTagName('segment')
+    for node in segment:
+        token = node.getElementsByTagName('token')
+        for tnode in token:
+            ref_word_id = tnode.getAttribute('ref_id')
+            word = tnode.getElementsByTagName('source')[0].firstChild.nodeValue
+            ref_line_id = word_line_dict[ref_word_id]
+            if ref_line_id not in text_line_word_dict:
+                text_line_word_dict[ref_line_id] = list()
+            text_line_word_dict[ref_line_id].append(word)
+    return text_line_word_dict
+
+
+def get_line_image_location():
+    image_loc_dict = dict()  # Stores image base name and location
+    image_loc_vect = input_image_fh.read().strip().split("\n")
+    for line in image_loc_vect:
+        base_name = os.path.basename(line)
+        location_vect = line.split('/')
+        location = "/".join(location_vect[:-1])
+        image_loc_dict[base_name]=location
+    return image_loc_dict
+
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+input_image_file = args.images_scp_path
+input_image_fh = open(input_image_file, 'r', encoding='utf-8')
+
+wc_dict1 = parse_writing_conditions(args.writing_condition1)
+wc_dict2 = parse_writing_conditions(args.writing_condition2)
+wc_dict3 = parse_writing_conditions(args.writing_condition3)
+image_loc_dict = get_line_image_location()
+
+image_num = 0
+with open(args.data_splits) as f:
+    prev_base_name = ''
+    for line in f:
+        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
+        if prev_base_name != base_name:
+            prev_base_name = base_name
+            madcat_xml_path, image_file_path, wc_dict = check_file_location()
+            if wc_dict is None or not check_writing_condition(wc_dict):
+                continue
+            madcat_doc = minidom.parse(madcat_xml_path)
+            writer = madcat_doc.getElementsByTagName('writer')
+            writer_id = writer[0].getAttribute('id')
+            text_line_word_dict = read_text(madcat_xml_path)
+            base_name = os.path.basename(image_file_path).split('.tif')[0]
+            for line_id in sorted(text_line_word_dict):
+                if args.augment:
+                    key = (line_id + '.')[:-1]
+                    for i in range(0, 3):
+                        location_id = '_' + line_id + '_scale' + str(i)
+                        line_image_file_name = base_name + location_id + '.png'
+                        location = image_loc_dict[line_image_file_name]
+                        image_file_path = os.path.join(location, line_image_file_name)
+                        line = text_line_word_dict[key]
+                        text = ' '.join(line)
+                        base_line_image_file_name = line_image_file_name.split('.png')[0]
+                        utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_line_image_file_name
+                        text_fh.write(utt_id + ' ' + text + '\n')
+                        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                        image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                        image_num += 1
+                else:
+                    updated_base_name = base_name + '_' + str(line_id).zfill(4) +'.png'
+                    location = image_loc_dict[updated_base_name]
+                    image_file_path = os.path.join(location, updated_base_name)
+                    line = text_line_word_dict[line_id]
+                    text = ' '.join(line)
+                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(line_id).zfill(4)
+                    text_fh.write(utt_id + ' ' + text + '\n')
+                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                    image_num += 1
diff --git a/egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py b/egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py
new file mode 100755
index 00000000000..df8b6c5149f
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser(description="Creates text, utt2spk and images.scp files",
+                                 epilog="E.g.  " + sys.argv[0] + " data/train data/local/lines ",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('image_transcription_file', type=str,
+                    help='Path to the file containing line image path and transcription information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files.')
+args = parser.parse_args()
+
+
+def read_image_text(image_text_path):
+    """ Given the file path containing, mapping information of line image
+     and transcription, it returns a dict. The dict contains this mapping
+    info. It can be accessed via line_id and will provide transcription.
+    Returns:
+    --------
+    dict: line_id and transcription mapping
+    """
+    image_transcription_dict = dict()
+    with open(image_text_path, encoding='utf-8') as f:
+        for line in f:
+            line_vect = line.strip().split(' ')
+            image_path = line_vect[0]
+            line_id = os.path.basename(image_path).split('.png')[0]
+            transcription = line_vect[1:]
+            #transcription = " ".join(transcription)
+            #image_transcription_dict[line_id] = transcription
+            joined_transcription = list()
+            for word in transcription:
+                joined_transcription.append(word)
+            joined_transcription = " ".join(joined_transcription)
+            image_transcription_dict[line_id] = joined_transcription
+    return image_transcription_dict
+
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+image_transcription_dict = read_image_text(args.image_transcription_file)
+for line_id in image_transcription_dict:
+        writer_id = line_id.strip().split('_')[-3]
+        updated_line_id = line_id + '.png'
+        image_file_path = os.path.join('lines', updated_line_id)
+        text = image_transcription_dict[line_id]
+        utt_id = line_id
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
+
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh
new file mode 100755
index 00000000000..5fe41e7cf4c
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2017  Hossein Hadian
+# Apache 2.0
+
+# This script prepares the training and test data for MADCAT Arabic dataset 
+# (i.e text, images.scp, utt2spk and spk2utt). It calls process_data.py.
+
+#  Eg. local/prepare_data.sh
+#  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 ﻮﺠﻫ ﻮﻌﻘﻟ ﻍﺍﺮﻗ ﺢﺗّﻯ ﺎﻠﻨﺧﺎﻋ
+#      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+#      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
+#      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
+
+stage=0
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+images_scp_dir=data/local
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+mkdir -p data/{train,test,dev}
+
+if [ $stage -le 1 ]; then
+  echo "$0: Processing dev, train and test data...$(date)"
+  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+    $data_splits_dir/madcat.dev.raw.lineid data/dev $images_scp_dir/dev/images.scp \
+    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
+
+  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+    $data_splits_dir/madcat.train.raw.lineid data/train $images_scp_dir/train/images.scp \
+    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
+
+  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+
+  for dataset in dev test train; do
+    echo "$0: Fixing data directory for dataset: $dataset."
+    image/fix_data_dir.sh data/$dataset
+  done
+fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
new file mode 100755
index 00000000000..f44b12667e9
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+
+# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
+# lattice alignments and to build a tree
+
+# local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b
+# System                      cnn_e2eali_1b
+# WER                             10.78
+# CER                              2.99
+# Final train prob              -0.0587
+# Final valid prob              -0.0609
+# Final train prob (xent)       -0.4471
+# Final valid prob (xent)       -0.4653
+# Parameters                      3.37M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
+#exp/chain/cnn_e2eali_1b: num-iters=179 nj=8..16 num-params=3.4M dim=40->416 combine=-0.058->-0.058 (over 3) xent:train/valid[118,178,final]=(-0.463,-0.445,-0.447/-0.477,-0.462,-0.465) logprob:train/valid[118,178,final]=(-0.062,-0.059,-0.059/-0.063,-0.061,-0.061)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=true
+lang_test=lang_test
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=56 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=2 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=16 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
new file mode 100755
index 00000000000..126d1d56c8f
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# Copyright 2017    Hossein Hadian
+#           2018    Ashish Arora
+set -e
+stage=0
+nj=30
+# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# This corpus can be purchased here:
+# https://catalog.ldc.upenn.edu/LDC2012T15,
+# https://catalog.ldc.upenn.edu/LDC2013T09/,
+# https://catalog.ldc.upenn.edu/LDC2013T15/.
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+mkdir -p data/{train,test,dev}/data
+mkdir -p data/local/{train,test,dev}
+
+if [ $stage -le 0 ]; then
+  echo "$0: Downloading data splits..."
+  echo "Date: $(date)."
+  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+fi
+
+if [ $stage -le 1 ]; then
+  for dataset in dev train; do
+    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
+        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
+        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
+        --data data/local/$dataset
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+      --download_dir3 $download_dir3 --images_scp_dir data/local \
+      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
+      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames"
+  echo "Date: $(date)."
+  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  echo "$0: Obtaining image groups. calling get_allowed_lengths"
+  echo "Date: $(date)."
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+fi
+
+if [ $stage -le 4 ]; then
+ # for dataset in train dev; do
+ #   echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
+ #   echo "Date: $(date)."
+ #   local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
+ #   steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+ # done
+ # echo "$0: Fixing data directory for train dataset $(date)."
+ # utils/fix_data_dir.sh data/train
+
+  local/make_features.py data/test/images.scp --feat-dim 40 \
+    --allowed_len_file_path data/test/allowed_lengths.txt --no-augment | \
+    copy-feats --compress=true --compression-method=7 \
+               ark:- ark,scp:data/test/data/images.ark,data/test/feats.scp
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  cut -d' ' -f2- data/train/text | python3 local/reverse.py | python3 local/prepend_words.py | python3 utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+  for set in test train dev ; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | python3 local/reverse.py | python3 local/prepend_words.py | python3 utils/lang/bpe/apply_bpe.py -c data/train/bpe.out | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+  done
+  local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang_test
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Calling the flat-start chain recipe..."
+  echo "Date: $(date)."
+  local/chain/run_flatstart_cnn1a.sh --nj $nj
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  echo "Date: $(date)."
+  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  echo "Date: $(date)."
+  local/chain/run_cnn_e2eali_1b.sh --nj $nj --stage 2
+fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
new file mode 100755
index 00000000000..4893dcfea08
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# System                      e2e_cnn_1a
+# WER                             10.71
+# CER                              2.85
+# Final train prob              -0.0859
+# Final valid prob              -0.1266
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      2.94M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
+# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
+
+set -e
+
+
+# configs for 'chain'
+stage=0
+nj=30
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=450
+num_epochs=2
+num_jobs_initial=3
+num_jobs_final=16
+minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
+common_egs_dir=
+l2_regularize=0.00005
+frames_per_iter=1000000
+cmvn_opts="--norm-means=false --norm-vars=false"
+train_set=train
+lang_test=lang_test
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type mono \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=56 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize $l2_regularize \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh
new file mode 100755
index 00000000000..1d84815fc69
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh
@@ -0,0 +1,6 @@
+
+#!/bin/bash
+
+
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
new file mode 100755
index 00000000000..5d27476d3e1
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright 2017    Hossein Hadian
+#           2018    Ashish Arora
+set -e
+stage=0
+nj=70
+# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# This corpus can be purchased here:
+# https://catalog.ldc.upenn.edu/LDC2012T15,
+# https://catalog.ldc.upenn.edu/LDC2013T09/,
+# https://catalog.ldc.upenn.edu/LDC2013T15/.
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+./local/check_tools.sh
+
+mkdir -p data/{train,test,dev}/data
+mkdir -p data/local/{train,test,dev}
+
+if [ $stage -le 0 ]; then
+  echo "$0: Downloading data splits..."
+  echo "Date: $(date)."
+  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+fi
+
+if [ $stage -le 1 ]; then
+  for dataset in test train dev; do
+    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
+        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
+        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
+        --data data/local/$dataset
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+      --download_dir3 $download_dir3 --images_scp_dir data/local \
+      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
+      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames"
+  echo "Date: $(date)."
+  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  echo "$0: Obtaining image groups. calling get_allowed_lengths"
+  echo "Date: $(date)."
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+fi
+
+if [ $stage -le 4 ]; then
+  for dataset in test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
+    echo "Date: $(date)."
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
+    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset"
+  echo "Date: $(date)."
+  utils/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  cut -d' ' -f2- data/train/text | local/reverse.py | \
+    local/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+  for set in test train dev; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | local/reverse.py | \
+      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+  done
+  local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang_test
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Calling the flat-start chain recipe..."
+  echo "Date: $(date)."
+  local/chain/run_flatstart_cnn1a.sh --nj $nj
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  echo "Date: $(date)."
+  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  echo "Date: $(date)."
+  local/chain/run_cnn_e2eali_1b.sh --nj $nj
+fi
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 5d27476d3e1..95af220fd3e 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -81,12 +81,13 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Preparing dictionary and lang..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
-    local/prepend_words.py | \
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
+      utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
       | sed 's/@@//g' > data/$set/bpe_text
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
diff --git a/egs/wsj/s5/utils/lang/make_lexicon_fst.py b/egs/wsj/s5/utils/lang/make_lexicon_fst.py
index 67ed0ac2789..89c50b2f069 100755
--- a/egs/wsj/s5/utils/lang/make_lexicon_fst.py
+++ b/egs/wsj/s5/utils/lang/make_lexicon_fst.py
@@ -72,7 +72,7 @@ def read_lexiconp(filename):
     with open(filename, 'r', encoding='latin-1') as f:
         whitespace = re.compile("[ \t]+")
         for line in f:
-            a = whitespace.split(line.strip())
+            a = whitespace.split(line.rstrip('\n'))
             if len(a) < 2:
                 print("{0}: error: found bad line '{1}' in lexicon file {2} ".format(
                     sys.argv[0], line.strip(), filename), file=sys.stderr)

From c3443d28ec1b0629dddb2ab02e276d981f451ff6 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 20:56:21 -0400
Subject: [PATCH 02/67] updating run_end2end for text localization

---
 egs/madcat_ar/v1/local/score.sh               |  4 +-
 .../v1/local/tl/not_much_imp/run_end2end.sh   | 96 +++++++++----------
 .../v1/local/tl/not_much_imp/score.sh         |  6 --
 egs/madcat_ar/v1/run_end2end.sh               | 71 ++++++--------
 4 files changed, 77 insertions(+), 100 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/score.sh

diff --git a/egs/madcat_ar/v1/local/score.sh b/egs/madcat_ar/v1/local/score.sh
index 2c11aba3e13..31564d25326 100755
--- a/egs/madcat_ar/v1/local/score.sh
+++ b/egs/madcat_ar/v1/local/score.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
 
-steps/scoring/score_kaldi_wer.sh --word_ins_penalty 0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0 "$@"
-steps/scoring/score_kaldi_cer.sh --stage 2 --word_ins_penalty 0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0 "$@"
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
index 126d1d56c8f..e21bf7b73dc 100755
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
@@ -7,9 +7,7 @@ nj=30
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -23,18 +21,22 @@ data_splits_dir=data/download/data_splits
 . ./path.sh
 . ./utils/parse_options.sh  # e.g. this parses the above options
                             # if supplied.
+./local/check_tools.sh
+
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
-
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)."
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
-  for dataset in dev train; do
+  for dataset in train dev; do
     data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
@@ -42,9 +44,7 @@ if [ $stage -le 1 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$dataset
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
       --download_dir3 $download_dir3 --images_scp_dir data/local \
@@ -52,74 +52,70 @@ if [ $stage -le 2 ]; then
       --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
 fi
 
-if [ $stage -le 3 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames"
-  echo "Date: $(date)."
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  echo "$0: Obtaining image groups. calling get_allowed_lengths"
-  echo "Date: $(date)."
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-fi
 
-if [ $stage -le 4 ]; then
- # for dataset in train dev; do
- #   echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
- #   echo "Date: $(date)."
- #   local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
- #   steps/compute_cmvn_stats.sh data/$dataset || exit 1;
- # done
- # echo "$0: Fixing data directory for train dataset $(date)."
- # utils/fix_data_dir.sh data/train
+  for dataset in dev train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
+    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset $(date)."
+  utils/fix_data_dir.sh data/train
 
   local/make_features.py data/test/images.scp --feat-dim 40 \
-    --allowed_len_file_path data/test/allowed_lengths.txt --no-augment | \
+    --allowed_len_file_path data/test/allowed_lengths.txt | \
     copy-feats --compress=true --compression-method=7 \
                ark:- ark,scp:data/test/data/images.ark,data/test/feats.scp
 fi
 
-if [ $stage -le 5 ]; then
-  echo "$0: Preparing dictionary and lang..."
-  cut -d' ' -f2- data/train/text | python3 local/reverse.py | python3 local/prepend_words.py | python3 utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
-  for set in test train dev ; do
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
+  cut -d' ' -f2- data/train/text | local/reverse.py | \
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
+  for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | python3 local/reverse.py | python3 local/prepend_words.py | python3 utils/lang/bpe/apply_bpe.py -c data/train/bpe.out | sed 's/@@//g' > data/$set/bpe_text
+    cut -d' ' -f2- data/$set/text | local/reverse.py | \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.out \
+      | sed 's/@@//g' > data/$set/bpe_text
+
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
   done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
-if [ $stage -le 7 ]; then
-  echo "$0: Calling the flat-start chain recipe..."
-  echo "Date: $(date)."
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
   local/chain/run_flatstart_cnn1a.sh --nj $nj
 fi
 
-if [ $stage -le 8 ]; then
-  echo "$0: Aligning the training data using the e2e chain model..."
-  echo "Date: $(date)."
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  echo "Date: $(date)."
-  local/chain/run_cnn_e2eali_1b.sh --nj $nj --stage 2
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
+  local/chain/run_cnn_e2eali_1b.sh --nj $nj
 fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh
deleted file mode 100755
index 1d84815fc69..00000000000
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/score.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-
-#!/bin/bash
-
-
-steps/scoring/score_kaldi_wer.sh "$@"
-steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 95af220fd3e..74753aaa45d 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -7,9 +7,7 @@ nj=70
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -27,15 +25,17 @@ data_splits_dir=data/download/data_splits
 
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
-
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
   for dataset in test train dev; do
     data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
@@ -44,9 +44,7 @@ if [ $stage -le 1 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$dataset
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
       --download_dir3 $download_dir3 --images_scp_dir data/local \
@@ -54,76 +52,65 @@ if [ $stage -le 2 ]; then
       --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
 fi
 
-if [ $stage -le 3 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames"
-  echo "Date: $(date)."
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  echo "$0: Obtaining image groups. calling get_allowed_lengths"
-  echo "Date: $(date)."
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-fi
 
-if [ $stage -le 4 ]; then
   for dataset in test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
-    echo "Date: $(date)."
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
     steps/compute_cmvn_stats.sh data/$dataset || exit 1;
   done
-  echo "$0: Fixing data directory for train dataset"
-  echo "Date: $(date)."
+  echo "$0: Fixing data directory for train dataset $(date)."
   utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 5 ]; then
-  echo "$0: Preparing dictionary and lang..."
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
     utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
       utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
-      utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.out \
       | sed 's/@@//g' > data/$set/bpe_text
+
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
   done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
-if [ $stage -le 7 ]; then
-  echo "$0: Calling the flat-start chain recipe..."
-  echo "Date: $(date)."
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
   local/chain/run_flatstart_cnn1a.sh --nj $nj
 fi
 
-if [ $stage -le 8 ]; then
-  echo "$0: Aligning the training data using the e2e chain model..."
-  echo "Date: $(date)."
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  echo "Date: $(date)."
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
   local/chain/run_cnn_e2eali_1b.sh --nj $nj
 fi

From 9c6a923e89ad8da94c428bfd6af01e4e12ecfb8a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 22:30:34 -0400
Subject: [PATCH 03/67] adding higher language model

---
 egs/madcat_ar/v1/local/train_lm.sh         | 42 +++++++++++++++-------
 egs/madcat_ar/v1/run_end2end.sh            |  4 +--
 egs/wsj/s5/utils/lang/bpe/prepend_words.py | 11 ++++--
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index 3b8a382cb00..85cb06480a3 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -6,20 +6,19 @@
 #           2017  Hossein Hadian
 # Apache 2.0
 #
-# This script trains a LM on the MADCAT training transcriptions.
+# This script trains a LM on the training transcriptions and corpus text.
 # It is based on the example scripts distributed with PocoLM
 
 # It will check if pocolm is installed and if not will proceed with installation
 
 set -e
 stage=0
-
+dir=data/local/local_lm
+order=6
 echo "$0 $@"  # Print the command line for logging
 . ./utils/parse_options.sh || exit 1;
 
-dir=data/local/local_lm
 lm_dir=${dir}/data
-segments=data/train/segmented_words
 
 
 mkdir -p $dir
@@ -43,12 +42,10 @@ bypass_metaparam_optim_opt=
 # These example numbers of metaparameters is for 4-gram model (with min-counts)
 # running with train_lm.py.
 # The dev perplexity should be close to the non-bypassed model.
-#bypass_metaparam_optim_opt=
 # Note: to use these example parameters, you may need to remove the .done files
 # to make sure the make_lm_dir.py be called and tain only 3-gram model
 #for order in 3; do
 #rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
-
 if [ $stage -le 0 ]; then
   mkdir -p ${dir}/data
   mkdir -p ${dir}/data/text
@@ -65,7 +62,7 @@ if [ $stage -le 0 ]; then
 
   # use the training data as an additional data source.
   # we can later fold the dev data into this.
-  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/madcat.txt
+  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
 
   # for reporting perplexities, we'll use the "real" dev set.
   # (the validation data is used as ${dir}/data/text/dev.txt to work
@@ -75,12 +72,10 @@ if [ $stage -le 0 ]; then
   cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
 
   # get the wordlist from MADCAT text
-  cat ${dir}/data/text/madcat.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
   cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
 fi
 
-order=3
-
 if [ $stage -le 1 ]; then
   # decide on the vocabulary.
   # Note: you'd use --wordlist if you had a previously determined word-list
@@ -88,7 +83,7 @@ if [ $stage -le 1 ]; then
   # Note: if you have more than one order, use a certain amount of words as the
   # vocab and want to restrict max memory for 'sort',
   echo "$0: training the unpruned LM"
-  min_counts='train=2 madcat=1'
+  min_counts='train=1'
   wordlist=${dir}/data/wordlist
 
   lm_name="`basename ${wordlist}`_${order}"
@@ -96,13 +91,34 @@ if [ $stage -le 1 ]; then
     lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
   fi
   unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
-  train_lm.py  --wordlist=${wordlist} --num-splits=5 --warm-start-ratio=1 \
+  train_lm.py  --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \
                --limit-unk-history=true \
                ${bypass_metaparam_optim_opt} \
                ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
 
   get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
-
   mkdir -p ${dir}/data/arpa
   format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
 fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: pruning the LM (to larger size)"
+  # Using 1 million n-grams for a big LM for rescoring purposes.
+  size=1000000
+  prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: pruning the LM (to smaller size)"
+  # Using 500k n-grams for a smaller LM for graph building.  Prune from the
+  # bigger-pruned LM, it'll be faster.
+  size=500000
+  prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
+fi
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 74753aaa45d..717a629ae60 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -76,7 +76,7 @@ if [ $stage -le 2 ]; then
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
       utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
-      utils/lang/bpe/apply_bpe.py -c data/local/bpe.out \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
 
     mv data/$set/text data/$set/text.old
@@ -94,7 +94,7 @@ fi
 if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
diff --git a/egs/wsj/s5/utils/lang/bpe/prepend_words.py b/egs/wsj/s5/utils/lang/bpe/prepend_words.py
index face771c7ca..d497344e850 100755
--- a/egs/wsj/s5/utils/lang/bpe/prepend_words.py
+++ b/egs/wsj/s5/utils/lang/bpe/prepend_words.py
@@ -4,11 +4,16 @@
 # the beginning of the words for finding the initial-space of every word
 # after decoding.
 
+import argparse
 import sys, io
 
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
+parser = argparse.ArgumentParser(description="Prepends '|' to the beginning of every word")
+parser.add_argument('--encoding', type=str, default='latin-1',
+                    help='Type of encoding')
+args = parser.parse_args()
+
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
+output = io.TextIOWrapper(sys.stdout.buffer, encoding=args.encoding)
 for line in infile:
     output.write(' '.join([ "|"+word for word in line.split()]) + '\n')
 
-

From 2c87fe5eecc60afd00a056e9f11acf55bc4bf54b Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 22:37:13 -0400
Subject: [PATCH 04/67] fixing bug

---
 egs/madcat_ar/v1/run_end2end.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 717a629ae60..3696284ed1e 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -75,7 +75,7 @@ if [ $stage -le 2 ]; then
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
       utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
 

From 053fbdb09491c48b9bf72084b1a5bb8d41c9de26 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 22:50:58 -0400
Subject: [PATCH 05/67] minor fix

---
 egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh | 10 +++++-----
 egs/madcat_ar/v1/run_end2end.sh                       |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
index e21bf7b73dc..01072c565bd 100755
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
@@ -32,7 +32,7 @@ if [ $stage -le 0 ]; then
     echo "Exiting with status 1 to avoid data corruption"
     exit 1;
   fi
-  echo "$0: Downloading data splits...$(date)."
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
 
@@ -80,13 +80,13 @@ if [ $stage -le 2 ]; then
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' |
-      utils/lang/bpe/apply_bpe.py -c data/local/bpe.out \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
 
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-    rm -f data/$set/bpe_text data/$set/ids
+    #rm -f data/$set/bpe_text data/$set/ids
   done
 
   echo "$0:Preparing dictionary and lang..."
@@ -99,7 +99,7 @@ fi
 if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 3696284ed1e..856ddb97f11 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -81,7 +81,7 @@ if [ $stage -le 2 ]; then
 
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-    rm -f data/$set/bpe_text data/$set/ids
+    #rm -f data/$set/bpe_text data/$set/ids
   done
 
   echo "$0:Preparing dictionary and lang..."

From 837fd4dfcd717d022dc9cab508571c47d385b796 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 14 Sep 2018 22:56:24 -0400
Subject: [PATCH 06/67] adding augmentation

---
 .../v1/local/tl/not_much_imp/run_end2end.sh   | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
index 01072c565bd..1ff5b549180 100755
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
+++ b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
@@ -56,22 +56,25 @@ if [ $stage -le 1 ]; then
   echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-
-  for dataset in dev train; do
+  for set in dev test train; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
-    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
   echo "$0: Fixing data directory for train dataset $(date)."
-  utils/fix_data_dir.sh data/train
+  image/fix_data_dir.sh data/train
 
-  local/make_features.py data/test/images.scp --feat-dim 40 \
-    --allowed_len_file_path data/test/allowed_lengths.txt | \
-    copy-feats --compress=true --compression-method=7 \
-               ark:- ark,scp:data/test/data/images.ark,data/test/feats.scp
 fi
 
 if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
   echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
     utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \

From 9c1d5533a501f80532032984c70c4bca280453b6 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 15:50:37 -0400
Subject: [PATCH 07/67] updating parameters

---
 .../v1/local/chain/run_flatstart_cnn1b.sh     | 174 ++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh

diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
new file mode 100644
index 00000000000..71130edf244
--- /dev/null
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# System                      e2e_cnn_1a
+# WER                             10.71
+# CER                              2.85
+# Final train prob              -0.0859
+# Final valid prob              -0.1266
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      2.94M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
+# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
+
+set -e
+
+# configs for 'chain'
+stage=0
+nj=70
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=550
+minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
+common_egs_dir=
+cmvn_opts="--norm-means=true --norm-vars=true"
+train_set=train
+lang_test=lang_test
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type mono \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs 2 \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial 6 \
+    --trainer.optimization.num-jobs-final 16 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir

From 47b6508c80f6cb0dfa3b74a4562843d877012a6a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 15:59:00 -0400
Subject: [PATCH 08/67] updating parameters

---
 .../v1/local/chain/run_flatstart_cnn1b.sh     | 27 +++++++------------
 egs/madcat_ar/v1/path.sh                      |  1 +
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
index 71130edf244..2374c2181c1 100644
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
@@ -23,7 +23,7 @@ stage=0
 nj=70
 train_stage=-10
 get_egs_stage=-10
-affix=1a
+affix=1b
 
 # training options
 tdnn_dim=550
@@ -94,20 +94,10 @@ if [ $stage -le 2 ]; then
   conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
   conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
   conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
-  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
+
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
@@ -123,15 +113,16 @@ if [ $stage -le 3 ]; then
   steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
     --cmd "$cmd" \
     --feat.cmvn-opts "$cmvn_opts" \
-    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
     --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize 0.00005 \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.num-chunk-per-minibatch $minibatch_size \
     --trainer.frames-per-iter 2000000 \
diff --git a/egs/madcat_ar/v1/path.sh b/egs/madcat_ar/v1/path.sh
index 2d17b17a84a..252d4ab04fe 100755
--- a/egs/madcat_ar/v1/path.sh
+++ b/egs/madcat_ar/v1/path.sh
@@ -3,4 +3,5 @@ export KALDI_ROOT=`pwd`/../../..
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
+export CUDA_CACHE_DISABLE=1
 export LC_ALL=C

From 18f585e46a597651fa4323f7c561011ce5cf7384 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 16:58:16 -0400
Subject: [PATCH 09/67] updating parameters

---
 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh | 1 +
 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh

diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
index 4eea10a8441..67c76fdfd37 100755
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
@@ -125,6 +125,7 @@ if [ $stage -le 3 ]; then
     --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.num-chunk-per-minibatch $minibatch_size \
     --trainer.frames-per-iter $frames_per_iter \
diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
old mode 100644
new mode 100755
index 2374c2181c1..901903a9bba
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
@@ -117,7 +117,6 @@ if [ $stage -le 3 ]; then
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
     --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
-    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
     --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \

From cf22d16ecc0cf933101e8c61aaaeb198796d3af1 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 18:11:14 -0400
Subject: [PATCH 10/67] minor cleaning and higher order language model

---
 .../v1/local/chain/run_cnn_e2eali_1b.sh       |  2 +-
 .../v1/local/chain/run_flatstart_cnn1a.sh     |  3 +-
 .../create_line_image_from_page_image.py      | 15 ++--
 egs/madcat_ar/v1/local/process_data.py        | 72 +++++++-----------
 egs/madcat_ar/v1/local/score.sh               |  4 +-
 egs/madcat_ar/v1/local/train_lm.sh            | 42 ++++++----
 egs/madcat_ar/v1/run_end2end.sh               | 76 ++++++++-----------
 7 files changed, 103 insertions(+), 111 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
index 75c246f5ffe..55df0cad4b7 100755
--- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -193,7 +193,7 @@ if [ $stage -le 5 ]; then
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
index 2c85e982ce6..67c76fdfd37 100755
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
@@ -33,7 +33,7 @@ num_jobs_final=16
 minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
 common_egs_dir=
 l2_regularize=0.00005
-frames_per_iter=1000000
+frames_per_iter=2000000
 cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
 lang_test=lang_test
@@ -125,6 +125,7 @@ if [ $stage -le 3 ]; then
     --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.num-chunk-per-minibatch $minibatch_size \
     --trainer.frames-per-iter $frames_per_iter \
diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index ba35f8b9ace..a91fe55ed3e 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -60,6 +60,8 @@
                     help='Path to the downloaded (and extracted) writing conditions file 3')
 parser.add_argument('--padding', type=int, default=400,
                     help='padding across horizontal/verticle direction')
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
 args = parser.parse_args()
 
 """
@@ -535,13 +537,12 @@ def check_writing_condition(wc_dict, base_name):
     Returns
     (bool): True if writing condition matches.
     """
-    return True
-    writing_condition = wc_dict[base_name].strip()
-    if writing_condition != 'IUC':
-        return False
-
-    return True
-
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
 
 ### main ###
 
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index b57500cf2fa..920cb6f700b 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -42,6 +42,8 @@
                     help='Path to the downloaded (and extracted) writing conditions file 2')
 parser.add_argument('writing_condition3', type=str,
                     help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
 args = parser.parse_args()
 
 
@@ -97,50 +99,40 @@ def check_writing_condition(wc_dict):
     Returns:
         (bool): True if writing condition matches.
     """
-    return True
-    writing_condition = wc_dict[base_name].strip()
-    if writing_condition != 'IUC':
-        return False
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
 
-    return True
 
-
-def get_word_line_mapping(madcat_file_path):
+def read_text(madcat_file_path):
     """ Maps every word in the page image to a  corresponding line.
     Args:
-         madcat_file_path (string): complete path and name of the madcat xml file
+        madcat_file_path (string): complete path and name of the madcat xml file
                                   corresponding to the page image.
     Returns:
+        dict: Mapping every word in the page image to a  corresponding line.
     """
+
+    word_line_dict = dict()
     doc = minidom.parse(madcat_file_path)
     zone = doc.getElementsByTagName('zone')
     for node in zone:
         line_id = node.getAttribute('id')
-        line_word_dict[line_id] = list()
         word_image = node.getElementsByTagName('token-image')
         for tnode in word_image:
             word_id = tnode.getAttribute('id')
-            line_word_dict[line_id].append(word_id)
             word_line_dict[word_id] = line_id
 
-
-def read_text(madcat_file_path):
-    """ Maps every word in the page image to a  corresponding line.
-    Args:
-        madcat_file_path (string): complete path and name of the madcat xml file
-                                  corresponding to the page image.
-    Returns:
-        dict: Mapping every word in the page image to a  corresponding line.
-    """
     text_line_word_dict = dict()
-    doc = minidom.parse(madcat_file_path)
     segment = doc.getElementsByTagName('segment')
     for node in segment:
         token = node.getElementsByTagName('token')
         for tnode in token:
             ref_word_id = tnode.getAttribute('ref_id')
             word = tnode.getElementsByTagName('source')[0].firstChild.nodeValue
-            word = unicodedata.normalize('NFKC',word)
             ref_line_id = word_line_dict[ref_word_id]
             if ref_line_id not in text_line_word_dict:
                 text_line_word_dict[ref_line_id] = list()
@@ -160,7 +152,6 @@ def get_line_image_location():
 
 
 ### main ###
-
 print("Processing '{}' data...".format(args.out_dir))
 
 text_file = os.path.join(args.out_dir, 'text')
@@ -188,24 +179,19 @@ def get_line_image_location():
             madcat_xml_path, image_file_path, wc_dict = check_file_location()
             if wc_dict is None or not check_writing_condition(wc_dict):
                 continue
-            if madcat_xml_path is not None:
-                madcat_doc = minidom.parse(madcat_xml_path)
-                writer = madcat_doc.getElementsByTagName('writer')
-                writer_id = writer[0].getAttribute('id')
-                line_word_dict = dict()
-                word_line_dict = dict()
-                get_word_line_mapping(madcat_xml_path)
-                text_line_word_dict = read_text(madcat_xml_path)
-                base_name = os.path.basename(image_file_path)
-                base_name, b = base_name.split('.tif')
-                for lineID in sorted(text_line_word_dict):
-                    updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
-                    location = image_loc_dict[updated_base_name]
-                    image_file_path = os.path.join(location, updated_base_name)
-                    line = text_line_word_dict[lineID]
-                    text = ' '.join(line)
-                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
-                    text_fh.write(utt_id + ' ' + text + '\n')
-                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
-                    image_num += 1
+            madcat_doc = minidom.parse(madcat_xml_path)
+            writer = madcat_doc.getElementsByTagName('writer')
+            writer_id = writer[0].getAttribute('id')
+            text_line_word_dict = read_text(madcat_xml_path)
+            base_name = os.path.basename(image_file_path).split('.tif')[0]
+            for lineID in sorted(text_line_word_dict):
+                updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
+                location = image_loc_dict[updated_base_name]
+                image_file_path = os.path.join(location, updated_base_name)
+                line = text_line_word_dict[lineID]
+                text = ' '.join(line)
+                utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
+                text_fh.write(utt_id + ' ' + text + '\n')
+                utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                image_num += 1
diff --git a/egs/madcat_ar/v1/local/score.sh b/egs/madcat_ar/v1/local/score.sh
index 2c11aba3e13..31564d25326 100755
--- a/egs/madcat_ar/v1/local/score.sh
+++ b/egs/madcat_ar/v1/local/score.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
 
-steps/scoring/score_kaldi_wer.sh --word_ins_penalty 0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0 "$@"
-steps/scoring/score_kaldi_cer.sh --stage 2 --word_ins_penalty 0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0 "$@"
+steps/scoring/score_kaldi_wer.sh "$@"
+steps/scoring/score_kaldi_cer.sh --stage 2 "$@"
diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index 3b8a382cb00..85cb06480a3 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -6,20 +6,19 @@
 #           2017  Hossein Hadian
 # Apache 2.0
 #
-# This script trains a LM on the MADCAT training transcriptions.
+# This script trains a LM on the training transcriptions and corpus text.
 # It is based on the example scripts distributed with PocoLM
 
 # It will check if pocolm is installed and if not will proceed with installation
 
 set -e
 stage=0
-
+dir=data/local/local_lm
+order=6
 echo "$0 $@"  # Print the command line for logging
 . ./utils/parse_options.sh || exit 1;
 
-dir=data/local/local_lm
 lm_dir=${dir}/data
-segments=data/train/segmented_words
 
 
 mkdir -p $dir
@@ -43,12 +42,10 @@ bypass_metaparam_optim_opt=
 # These example numbers of metaparameters is for 4-gram model (with min-counts)
 # running with train_lm.py.
 # The dev perplexity should be close to the non-bypassed model.
-#bypass_metaparam_optim_opt=
 # Note: to use these example parameters, you may need to remove the .done files
 # to make sure the make_lm_dir.py be called and tain only 3-gram model
 #for order in 3; do
 #rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
-
 if [ $stage -le 0 ]; then
   mkdir -p ${dir}/data
   mkdir -p ${dir}/data/text
@@ -65,7 +62,7 @@ if [ $stage -le 0 ]; then
 
   # use the training data as an additional data source.
   # we can later fold the dev data into this.
-  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/madcat.txt
+  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
 
   # for reporting perplexities, we'll use the "real" dev set.
   # (the validation data is used as ${dir}/data/text/dev.txt to work
@@ -75,12 +72,10 @@ if [ $stage -le 0 ]; then
   cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
 
   # get the wordlist from MADCAT text
-  cat ${dir}/data/text/madcat.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
   cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
 fi
 
-order=3
-
 if [ $stage -le 1 ]; then
   # decide on the vocabulary.
   # Note: you'd use --wordlist if you had a previously determined word-list
@@ -88,7 +83,7 @@ if [ $stage -le 1 ]; then
   # Note: if you have more than one order, use a certain amount of words as the
   # vocab and want to restrict max memory for 'sort',
   echo "$0: training the unpruned LM"
-  min_counts='train=2 madcat=1'
+  min_counts='train=1'
   wordlist=${dir}/data/wordlist
 
   lm_name="`basename ${wordlist}`_${order}"
@@ -96,13 +91,34 @@ if [ $stage -le 1 ]; then
     lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
   fi
   unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
-  train_lm.py  --wordlist=${wordlist} --num-splits=5 --warm-start-ratio=1 \
+  train_lm.py  --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \
                --limit-unk-history=true \
                ${bypass_metaparam_optim_opt} \
                ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
 
   get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
-
   mkdir -p ${dir}/data/arpa
   format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
 fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: pruning the LM (to larger size)"
+  # Using 1 million n-grams for a big LM for rescoring purposes.
+  size=1000000
+  prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: pruning the LM (to smaller size)"
+  # Using 500k n-grams for a smaller LM for graph building.  Prune from the
+  # bigger-pruned LM, it'll be faster.
+  size=500000
+  prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
+  format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
+fi
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 5d27476d3e1..3696284ed1e 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -7,9 +7,7 @@ nj=70
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -27,15 +25,17 @@ data_splits_dir=data/download/data_splits
 
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
-
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
   for dataset in test train dev; do
     data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
@@ -44,9 +44,7 @@ if [ $stage -le 1 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$dataset
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
       --download_dir3 $download_dir3 --images_scp_dir data/local \
@@ -54,75 +52,65 @@ if [ $stage -le 2 ]; then
       --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
 fi
 
-if [ $stage -le 3 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames"
-  echo "Date: $(date)."
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  echo "$0: Obtaining image groups. calling get_allowed_lengths"
-  echo "Date: $(date)."
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-fi
 
-if [ $stage -le 4 ]; then
   for dataset in test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
-    echo "Date: $(date)."
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
     steps/compute_cmvn_stats.sh data/$dataset || exit 1;
   done
-  echo "$0: Fixing data directory for train dataset"
-  echo "Date: $(date)."
+  echo "$0: Fixing data directory for train dataset $(date)."
   utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 5 ]; then
-  echo "$0: Preparing dictionary and lang..."
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
-    local/prepend_words.py | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
+
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
   done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
-if [ $stage -le 7 ]; then
-  echo "$0: Calling the flat-start chain recipe..."
-  echo "Date: $(date)."
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
   local/chain/run_flatstart_cnn1a.sh --nj $nj
 fi
 
-if [ $stage -le 8 ]; then
-  echo "$0: Aligning the training data using the e2e chain model..."
-  echo "Date: $(date)."
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  echo "Date: $(date)."
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
   local/chain/run_cnn_e2eali_1b.sh --nj $nj
 fi

From 95aed1005b58f8adcff45f988947bfffabd09bbb Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 18:16:33 -0400
Subject: [PATCH 11/67] updating results

---
 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh | 10 +++++-----
 egs/madcat_ar/v1/local/train_lm.sh                  |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
index 67c76fdfd37..033cb88df10 100755
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
@@ -5,16 +5,16 @@
 
 # local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
 # System                      e2e_cnn_1a
-# WER                             10.71
-# CER                              2.85
-# Final train prob              -0.0859
-# Final valid prob              -0.1266
+# WER                              7.81
+# CER                              2.05
+# Final train prob              -0.0812
+# Final valid prob              -0.0708
 # Final train prob (xent)
 # Final valid prob (xent)
 # Parameters                      2.94M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
-# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
+# exp/chain/e2e_cnn_1a/: num-iters=98 nj=6..16 num-params=2.9M dim=40->330 combine=-0.073->-0.073 (over 2) logprob:train/valid[64,97,final]=(-0.084,-0.080,-0.081/-0.073,-0.070,-0.071)
 
 set -e
 
diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index 85cb06480a3..b7fc0b09a46 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -6,7 +6,7 @@
 #           2017  Hossein Hadian
 # Apache 2.0
 #
-# This script trains a LM on the training transcriptions and corpus text.
+# This script trains a LM on the training transcriptions.
 # It is based on the example scripts distributed with PocoLM
 
 # It will check if pocolm is installed and if not will proceed with installation

From 85e3649535912d68ebadeae7126276edda0c4cb0 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 18:26:11 -0400
Subject: [PATCH 12/67] minor fix and adding tuning directory

---
 egs/madcat_ar/v1/local/chain/run_cnn.sh                       | 1 +
 egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh              | 1 +
 egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh                | 1 +
 egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh                   | 1 +
 egs/madcat_ar/v1/local/chain/{ => tuning}/run_cnn_1a.sh       | 0
 .../v1/local/chain/{ => tuning}/run_cnn_chainali_1a.sh        | 0
 .../v1/local/chain/{ => tuning}/run_cnn_e2eali_1a.sh          | 0
 .../v1/local/chain/{ => tuning}/run_cnn_e2eali_1b.sh          | 0
 .../{run_flatstart_cnn1a.sh => tuning/run_e2e_cnn_1a.sh}      | 0
 egs/madcat_ar/v1/run.sh                                       | 4 ++--
 egs/madcat_ar/v1/run_end2end.sh                               | 4 ++--
 11 files changed, 8 insertions(+), 4 deletions(-)
 create mode 120000 egs/madcat_ar/v1/local/chain/run_cnn.sh
 create mode 120000 egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh
 create mode 120000 egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh
 create mode 120000 egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh
 rename egs/madcat_ar/v1/local/chain/{ => tuning}/run_cnn_1a.sh (100%)
 rename egs/madcat_ar/v1/local/chain/{ => tuning}/run_cnn_chainali_1a.sh (100%)
 rename egs/madcat_ar/v1/local/chain/{ => tuning}/run_cnn_e2eali_1a.sh (100%)
 rename egs/madcat_ar/v1/local/chain/{ => tuning}/run_cnn_e2eali_1b.sh (100%)
 rename egs/madcat_ar/v1/local/chain/{run_flatstart_cnn1a.sh => tuning/run_e2e_cnn_1a.sh} (100%)

diff --git a/egs/madcat_ar/v1/local/chain/run_cnn.sh b/egs/madcat_ar/v1/local/chain/run_cnn.sh
new file mode 120000
index 00000000000..df6f0a468c1
--- /dev/null
+++ b/egs/madcat_ar/v1/local/chain/run_cnn.sh
@@ -0,0 +1 @@
+tuning/run_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh
new file mode 120000
index 00000000000..a864819f542
--- /dev/null
+++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_chainali_1a.sh
\ No newline at end of file
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh
new file mode 120000
index 00000000000..fcf59f917c1
--- /dev/null
+++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_e2eali_1b.sh
\ No newline at end of file
diff --git a/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..d26ba0182ce
--- /dev/null
+++ b/egs/madcat_ar/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/chain/run_cnn_1a.sh
rename to egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh
rename to egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh
rename to egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh
rename to egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/chain/run_flatstart_cnn1a.sh
rename to egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 14c8bf7a6ce..06a16bf157a 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -132,9 +132,9 @@ if [ $stage -le 12 ]; then
 fi
 
 if [ $stage -le 13 ]; then
-  local/chain/run_cnn_1a.sh
+  local/chain/run_cnn.sh
 fi
 
 if [ $stage -le 14 ]; then
-  local/chain/run_cnn_chainali_1a.sh --stage 2
+  local/chain/run_cnn_chainali.sh --stage 2
 fi
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 3696284ed1e..0a57676fdbf 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -100,7 +100,7 @@ fi
 
 if [ $stage -le 4 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_flatstart_cnn1a.sh --nj $nj
+  local/chain/run_e2e_cnn.sh --nj $nj
 fi
 
 if [ $stage -le 5 ]; then
@@ -112,5 +112,5 @@ fi
 
 if [ $stage -le 6 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali_1b.sh --nj $nj
+  local/chain/run_cnn_e2eali.sh --nj $nj
 fi

From bff652cc88c46741fb7e14be429b79a77834d812 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 18:50:24 -0400
Subject: [PATCH 13/67] adding overwrite variable

---
 egs/madcat_ar/v1/local/extract_features.sh | 4 ++++
 egs/madcat_ar/v1/run_end2end.sh            | 1 +
 2 files changed, 5 insertions(+)

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 70c5498626c..56a8443e328 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
+
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
 nj=4
 cmd=run.pl
 feat_dim=40
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 0a57676fdbf..e5ca540d3c1 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -15,6 +15,7 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+overwrite=false
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.

From 303246ee867a2414b755f76a52e7406a98f1f7b3 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 18:53:56 -0400
Subject: [PATCH 14/67] adding documentation, fixing run.sh, minor fix

---
 egs/madcat_ar/v1/run.sh | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 06a16bf157a..f6a63320497 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -11,9 +11,7 @@ decode_gmm=false
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -21,7 +19,7 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
-
+overwrite=false
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -34,8 +32,14 @@ mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
 
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
 fi
@@ -79,7 +83,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 

From 6b857dec87a33652921835bbf6e64fba125799b7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 22:04:38 -0400
Subject: [PATCH 15/67] adding text localization changes

---
 egs/madcat_ar/v1/local/tl/augment_data.sh     |  34 ++
 ...eate_line_image_from_page_image.py.augment | 530 ++++++++++++++++++
 egs/madcat_ar/v1/local/tl/make_features.py    | 170 ++++++
 egs/madcat_ar/v1/local/tl/prepare_data.sh     |  49 ++
 egs/madcat_ar/v1/local/tl/process_data.py     | 215 +++++++
 .../v1/local/tl/process_waldo_data.py         |  62 ++
 egs/madcat_ar/v1/local/tl/run_end2end.sh      | 124 ++++
 .../v1/local/tl/run_textlocalization.sh       | 128 +++++
 8 files changed, 1312 insertions(+)
 create mode 100755 egs/madcat_ar/v1/local/tl/augment_data.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment
 create mode 100755 egs/madcat_ar/v1/local/tl/make_features.py
 create mode 100755 egs/madcat_ar/v1/local/tl/prepare_data.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/process_data.py
 create mode 100755 egs/madcat_ar/v1/local/tl/process_waldo_data.py
 create mode 100755 egs/madcat_ar/v1/local/tl/run_end2end.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/run_textlocalization.sh

diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
new file mode 100755
index 00000000000..31e4a8217ca
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+aug_set=aug1
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp"
+
+for set in $aug_set; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --fliplr false --augment true $datadir/augmentations/$set
+done
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment
new file mode 100755
index 00000000000..faf0d3503c7
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment
@@ -0,0 +1,530 @@
+#!/usr/bin/env python3
+
+# Copyright   2018 Ashish Arora
+# Apache 2.0
+# minimum bounding box part in this script is originally from
+#https://github.com/BebeSparkelSparkel/MinimumBoundingBox
+#https://startupnextdoor.com/computing-convex-hull-in-python/
+""" This module will be used for extracting line images from page image.
+ Given the word segmentation (bounding box around a word) for every word, it will
+ extract line segmentation. To extract line segmentation, it will take word bounding
+ boxes of a line as input, will create a minimum area bounding box that will contain
+ all corner points of word bounding boxes. The obtained bounding box (will not necessarily
+ be vertically or horizontally aligned). Hence to extract line image from line bounding box,
+ page image is rotated and line image is cropped and saved.
+"""
+
+import sys
+import argparse
+import os
+import xml.dom.minidom as minidom
+import numpy as np
+from math import atan2, cos, sin, pi, degrees, sqrt
+from collections import namedtuple
+import random
+from scipy.spatial import ConvexHull
+from PIL import Image
+from scipy.misc import toimage
+parser = argparse.ArgumentParser(description="Creates line images from page image",
+                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
+                                             " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
+                                             " data/local/lines ",
+                                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('database_path1', type=str,
+                    help='Path to the downloaded madcat data directory 1')
+parser.add_argument('database_path2', type=str,
+                    help='Path to the downloaded madcat data directory 2')
+parser.add_argument('database_path3', type=str,
+                    help='Path to the downloaded madcat data directory 3')
+parser.add_argument('data_splits', type=str,
+                    help='Path to file that contains the train/test/dev split information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files')
+parser.add_argument('writing_condition1', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 1')
+parser.add_argument('writing_condition2', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 2')
+parser.add_argument('writing_condition3', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument('--padding', type=int, default=400,
+                    help='padding across horizontal/verticle direction')
+parser.add_argument('--pixel-scaling', type=int, default=30,
+                    help='padding across horizontal/verticle direction')
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
+args = parser.parse_args()
+
+"""
+bounding_box is a named tuple which contains:
+             area (float): area of the rectangle
+             length_parallel (float): length of the side that is parallel to unit_vector
+             length_orthogonal (float): length of the side that is orthogonal to unit_vector
+             rectangle_center(int, int): coordinates of the rectangle center
+             (use rectangle_corners to get the corner points of the rectangle)
+             unit_vector (float, float): direction of the length_parallel side.
+             (it's orthogonal vector can be found with the orthogonal_vector function
+             unit_vector_angle (float): angle of the unit vector to be in radians.
+             corner_points [(float, float)]: set that contains the corners of the rectangle
+"""
+
+bounding_box_tuple = namedtuple('bounding_box_tuple', 'area '
+                                        'length_parallel '
+                                        'length_orthogonal '
+                                        'rectangle_center '
+                                        'unit_vector '
+                                        'unit_vector_angle '
+                                        'corner_points'
+                         )
+
+
+def unit_vector(pt0, pt1):
+    """ Given two points pt0 and pt1, return a unit vector that
+        points in the direction of pt0 to pt1.
+    Returns
+    -------
+    (float, float): unit vector
+    """
+    dis_0_to_1 = sqrt((pt0[0] - pt1[0])**2 + (pt0[1] - pt1[1])**2)
+    return (pt1[0] - pt0[0]) / dis_0_to_1, \
+           (pt1[1] - pt0[1]) / dis_0_to_1
+
+
+def orthogonal_vector(vector):
+    """ Given a vector, returns a orthogonal/perpendicular vector of equal length.
+    Returns
+    ------
+    (float, float): A vector that points in the direction orthogonal to vector.
+    """
+    return -1 * vector[1], vector[0]
+
+
+def bounding_area(index, hull):
+    """ Given index location in an array and convex hull, it gets two points
+        hull[index] and hull[index+1]. From these two points, it returns a named
+        tuple that mainly contains area of the box that bounds the hull. This
+        bounding box orintation is same as the orientation of the lines formed
+        by the point hull[index] and hull[index+1].
+    Returns
+    -------
+    a named tuple that contains:
+    area: area of the rectangle
+    length_parallel: length of the side that is parallel to unit_vector
+    length_orthogonal: length of the side that is orthogonal to unit_vector
+    rectangle_center: coordinates of the rectangle center
+    unit_vector: direction of the length_parallel side.
+    (it's orthogonal vector can be found with the orthogonal_vector function)
+    """
+    unit_vector_p = unit_vector(hull[index], hull[index+1])
+    unit_vector_o = orthogonal_vector(unit_vector_p)
+
+    dis_p = tuple(np.dot(unit_vector_p, pt) for pt in hull)
+    dis_o = tuple(np.dot(unit_vector_o, pt) for pt in hull)
+
+    min_p = min(dis_p)
+    min_o = min(dis_o)
+    len_p = max(dis_p) - min_p
+    len_o = max(dis_o) - min_o
+
+    return {'area': len_p * len_o,
+            'length_parallel': len_p,
+            'length_orthogonal': len_o,
+            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
+            'unit_vector': unit_vector_p,
+            }
+
+
+def to_xy_coordinates(unit_vector_angle, point):
+    """ Given angle from horizontal axis and a point from origin,
+        returns converted unit vector coordinates in x, y coordinates.
+        angle of unit vector should be in radians.
+    Returns
+    ------
+    (float, float): converted x,y coordinate of the unit vector.
+    """
+    angle_orthogonal = unit_vector_angle + pi / 2
+    return point[0] * cos(unit_vector_angle) + point[1] * cos(angle_orthogonal), \
+           point[0] * sin(unit_vector_angle) + point[1] * sin(angle_orthogonal)
+
+
+def rotate_points(center_of_rotation, angle, points):
+    """ Rotates a point cloud around the center_of_rotation point by angle
+    input
+    -----
+    center_of_rotation (float, float): angle of unit vector to be in radians.
+    angle (float): angle of rotation to be in radians.
+    points [(float, float)]: Points to be a list or tuple of points. Points to be rotated.
+    Returns
+    ------
+    [(float, float)]: Rotated points around center of rotation by angle
+    """
+    rot_points = []
+    ang = []
+    for pt in points:
+        diff = tuple([pt[d] - center_of_rotation[d] for d in range(2)])
+        diff_angle = atan2(diff[1], diff[0]) + angle
+        ang.append(diff_angle)
+        diff_length = sqrt(sum([d**2 for d in diff]))
+        rot_points.append((center_of_rotation[0] + diff_length * cos(diff_angle),
+                           center_of_rotation[1] + diff_length * sin(diff_angle)))
+
+    return rot_points
+
+
+def rectangle_corners(rectangle):
+    """ Given rectangle center and its inclination, returns the corner
+        locations of the rectangle.
+    Returns
+    ------
+    [(float, float)]: 4 corner points of rectangle.
+    """
+    corner_points = []
+    for i1 in (.5, -.5):
+        for i2 in (i1, -1 * i1):
+            corner_points.append((rectangle['rectangle_center'][0] + i1 * rectangle['length_parallel'],
+                            rectangle['rectangle_center'][1] + i2 * rectangle['length_orthogonal']))
+
+    return rotate_points(rectangle['rectangle_center'], rectangle['unit_vector_angle'], corner_points)
+
+
+def minimum_bounding_box(points):
+    """ Given a list of 2D points, it returns the minimum area rectangle bounding all
+        the points in the point cloud.
+    Returns
+    ------
+    returns a namedtuple that contains:
+    area: area of the rectangle
+    length_parallel: length of the side that is parallel to unit_vector
+    length_orthogonal: length of the side that is orthogonal to unit_vector
+    rectangle_center: coordinates of the rectangle center
+    unit_vector: direction of the length_parallel side. RADIANS
+    unit_vector_angle: angle of the unit vector
+    corner_points: set that contains the corners of the rectangle
+    """
+
+    if len(points) <= 2: raise ValueError('More than two points required.')
+
+    hull_ordered = [points[index] for index in ConvexHull(points).vertices]
+    hull_ordered.append(hull_ordered[0])
+    hull_ordered = tuple(hull_ordered)
+
+    min_rectangle = bounding_area(0, hull_ordered)
+    for i in range(1, len(hull_ordered)-1):
+        rectangle = bounding_area(i, hull_ordered)
+        if rectangle['area'] < min_rectangle['area']:
+            min_rectangle = rectangle
+
+    min_rectangle['unit_vector_angle'] = atan2(min_rectangle['unit_vector'][1], min_rectangle['unit_vector'][0])
+    min_rectangle['rectangle_center'] = to_xy_coordinates(min_rectangle['unit_vector_angle'], min_rectangle['rectangle_center'])
+
+    return bounding_box_tuple(
+        area = min_rectangle['area'],
+        length_parallel = min_rectangle['length_parallel'],
+        length_orthogonal = min_rectangle['length_orthogonal'],
+        rectangle_center = min_rectangle['rectangle_center'],
+        unit_vector = min_rectangle['unit_vector'],
+        unit_vector_angle = min_rectangle['unit_vector_angle'],
+        corner_points = set(rectangle_corners(min_rectangle))
+    )
+
+
+def get_center(im):
+    """ Given image, returns the location of center pixel
+    Returns
+    -------
+    (int, int): center of the image
+    """
+    center_x = im.size[0] / 2
+    center_y = im.size[1] / 2
+    return int(center_x), int(center_y)
+
+
+def get_horizontal_angle(unit_vector_angle):
+    """ Given an angle in radians, returns angle of the unit vector in
+        first or fourth quadrant.
+    Returns
+    ------
+    (float): updated angle of the unit vector to be in radians.
+             It is only in first or fourth quadrant.
+    """
+    if unit_vector_angle > pi / 2 and unit_vector_angle <= pi:
+        unit_vector_angle = unit_vector_angle - pi
+    elif unit_vector_angle > -pi and unit_vector_angle < -pi / 2:
+        unit_vector_angle = unit_vector_angle + pi
+
+    return unit_vector_angle
+
+
+def get_smaller_angle(bounding_box):
+    """ Given a rectangle, returns its smallest absolute angle from horizontal axis.
+    Returns
+    ------
+    (float): smallest angle of the rectangle to be in radians.
+    """
+    unit_vector = bounding_box.unit_vector
+    unit_vector_angle = bounding_box.unit_vector_angle
+    ortho_vector = orthogonal_vector(unit_vector)
+    ortho_vector_angle = atan2(ortho_vector[1], ortho_vector[0])
+
+    unit_vector_angle_updated = get_horizontal_angle(unit_vector_angle)
+    ortho_vector_angle_updated = get_horizontal_angle(ortho_vector_angle)
+
+    if abs(unit_vector_angle_updated) < abs(ortho_vector_angle_updated):
+        return unit_vector_angle_updated
+    else:
+        return ortho_vector_angle_updated
+
+
+def rotated_points(bounding_box, center):
+    """ Given the rectangle, returns corner points of rotated rectangle.
+        It rotates the rectangle around the center by its smallest angle.
+    Returns
+    -------
+    [(int, int)]: 4 corner points of rectangle.
+    """
+    p1, p2, p3, p4 = bounding_box.corner_points
+    x1, y1 = p1
+    x2, y2 = p2
+    x3, y3 = p3
+    x4, y4 = p4
+    center_x, center_y = center
+    rotation_angle_in_rad = -get_smaller_angle(bounding_box)
+    x_dash_1 = (x1 - center_x) * cos(rotation_angle_in_rad) - (y1 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_2 = (x2 - center_x) * cos(rotation_angle_in_rad) - (y2 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_3 = (x3 - center_x) * cos(rotation_angle_in_rad) - (y3 - center_y) * sin(rotation_angle_in_rad) + center_x
+    x_dash_4 = (x4 - center_x) * cos(rotation_angle_in_rad) - (y4 - center_y) * sin(rotation_angle_in_rad) + center_x
+
+    y_dash_1 = (y1 - center_y) * cos(rotation_angle_in_rad) + (x1 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_2 = (y2 - center_y) * cos(rotation_angle_in_rad) + (x2 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_3 = (y3 - center_y) * cos(rotation_angle_in_rad) + (x3 - center_x) * sin(rotation_angle_in_rad) + center_y
+    y_dash_4 = (y4 - center_y) * cos(rotation_angle_in_rad) + (x4 - center_x) * sin(rotation_angle_in_rad) + center_y
+    return x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4
+
+
+def pad_image(image):
+    """ Given an image, returns a padded image around the border.
+        This routine save the code from crashing if bounding boxes that are
+        slightly outside the page boundary.
+    Returns
+    -------
+    image: page image
+    """
+    offset = int(args.padding // 2)
+    padded_image = Image.new('RGB', (image.size[0] + int(args.padding), image.size[1] + int(args.padding)), "white")
+    padded_image.paste(im = image, box = (offset, offset))
+    return padded_image
+
+
+def update_minimum_bounding_box_input(bounding_box_input):
+    """ Given list of 2D points, returns list of 2D points shifted by an offset.
+    Returns
+    ------
+    points [(float, float)]: points, a list or tuple of 2D coordinates
+    """
+    updated_minimum_bounding_box_input = []
+    offset = int(args.padding // 2)
+    for point in bounding_box_input:
+        x, y = point
+        new_x = x + offset
+        new_y = y + offset
+        word_coordinate = (new_x, new_y)
+        updated_minimum_bounding_box_input.append(word_coordinate)
+
+    return updated_minimum_bounding_box_input
+
+
+def dilate_polygon(points, amount_increase):
+    """ Increases size of polygon given as a list of tuples.
+        Assumes points in polygon are given in CCW
+    """
+    expanded_points = []
+    for index, point in enumerate(points):
+        prev_point = points[(index - 1) % len(points)]
+        next_point = points[(index + 1) % len(points)]
+        prev_edge = np.subtract(point, prev_point)
+        next_edge = np.subtract(next_point, point)
+
+        prev_normal = ((1 * prev_edge[1]), (-1 * prev_edge[0]))
+        prev_normal = np.divide(prev_normal, np.linalg.norm(prev_normal))
+        next_normal = ((1 * next_edge[1]), (-1 * next_edge[0]))
+        next_normal = np.divide(next_normal, np.linalg.norm(next_normal))
+
+        bisect = np.add(prev_normal, next_normal)
+        bisect = np.divide(bisect, np.linalg.norm(bisect))
+
+        cos_theta = np.dot(next_normal, bisect)
+        hyp = amount_increase / cos_theta
+
+        new_point = np.around(point + hyp * bisect)
+        new_point = new_point.astype(int)
+        new_point = new_point.tolist()
+        new_point = tuple(new_point)
+        expanded_points.append(new_point)
+    return expanded_points
+
+
+def set_line_image_data(image, line_id, image_file_name, image_fh):
+    """ Given an image, saves a flipped line image. Line image file name
+        is formed by appending the line id at the end page image name.
+    """
+
+    base_name = os.path.splitext(os.path.basename(image_file_name))[0]
+    line_id = '_' + line_id.zfill(4)
+    line_image_file_name = base_name + line_id + '.png'
+    image_path = os.path.join(args.out_dir, line_image_file_name)
+    imgray = image.convert('L')
+    imgray_rev_arr = np.fliplr(imgray)
+    imgray_rev = toimage(imgray_rev_arr)
+    imgray_rev.save(image_path)
+    image_fh.write(image_path + '\n')
+
+
+def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh):
+    """ Given a page image, extracts the line images from it.
+    Input
+    -----
+    image_file_name (string): complete path and name of the page image.
+    madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    """
+    im_wo_pad = Image.open(image_file_name)
+    im = pad_image(im_wo_pad)
+    doc = minidom.parse(madcat_file_path)
+    zone = doc.getElementsByTagName('zone')
+    for node in zone:
+        id = node.getAttribute('id')
+        token_image = node.getElementsByTagName('token-image')
+        minimum_bounding_box_input = []
+        for token_node in token_image:
+            word_point = token_node.getElementsByTagName('point')
+            for word_node in word_point:
+                word_coordinate = (int(word_node.getAttribute('x')), int(word_node.getAttribute('y')))
+                minimum_bounding_box_input.append(word_coordinate)
+        updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
+        points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
+        for i in range(0, 3):
+            additional_pixel = random.randint(1, args.pixel_scaling)
+            mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
+            bounding_box = minimum_bounding_box(mar)
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+            min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+            max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+            box = (min_x, min_y, max_x, max_y)
+            region_initial = im.crop(box)
+            rot_points = []
+            p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+            p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+            rot_points.append(p1)
+            rot_points.append(p2)
+            rot_points.append(p3)
+            rot_points.append(p4)
+
+            cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                    bounding_box.length_parallel,
+                    bounding_box.length_orthogonal,
+                    bounding_box.length_orthogonal,
+                    bounding_box.unit_vector,
+                    bounding_box.unit_vector_angle,
+                    set(rot_points)
+                )
+
+            rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+            img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+            x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+                cropped_bounding_box, get_center(region_initial))
+
+            min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            box = (min_x, min_y, max_x, max_y)
+            region_final = img2.crop(box)
+            line_id = id + '_scale' + str(i)
+            set_line_image_data(region_final, line_id, image_file_name, image_fh)
+
+
+def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
+    """ Returns the complete path of the page image and corresponding
+        xml file.
+    Returns
+    -------
+    image_file_name (string): complete path and name of the page image.
+    madcat_file_path (string): complete path and name of the madcat xml file
+                               corresponding to the page image.
+    """
+    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
+
+    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
+    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
+    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
+
+    if os.path.exists(madcat_file_path1):
+        return madcat_file_path1, image_file_path1, wc_dict1
+
+    if os.path.exists(madcat_file_path2):
+        return madcat_file_path2, image_file_path2, wc_dict2
+
+    if os.path.exists(madcat_file_path3):
+        return madcat_file_path3, image_file_path3, wc_dict3
+
+    return None, None, None
+
+
+def parse_writing_conditions(writing_conditions):
+    """ Given writing condition file path, returns a dictionary which have writing condition
+        of each page image.
+    Returns
+    ------
+    (dict): dictionary with key as page image name and value as writing condition.
+    """
+    with open(writing_conditions) as f:
+        file_writing_cond = dict()
+        for line in f:
+            line_list = line.strip().split("\t")
+            file_writing_cond[line_list[0]] = line_list[3]
+    return file_writing_cond
+
+
+def check_writing_condition(wc_dict, base_name):
+    """ Given writing condition dictionary, checks if a page image is writing
+        in a specifed writing condition.
+        It is used to create subset of dataset based on writing condition.
+    Returns
+    (bool): True if writing condition matches.
+    """
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
+
+### main ###
+
+def main():
+
+    wc_dict1 = parse_writing_conditions(args.writing_condition1)
+    wc_dict2 = parse_writing_conditions(args.writing_condition2)
+    wc_dict3 = parse_writing_conditions(args.writing_condition3)
+    output_directory = args.out_dir
+    image_file = os.path.join(output_directory, 'images.scp')
+    image_fh = open(image_file, 'w', encoding='utf-8')
+
+    splits_handle = open(args.data_splits, 'r')
+    splits_data = splits_handle.read().strip().split('\n')
+    prev_base_name = ''
+    for line in splits_data:
+        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
+        if prev_base_name != base_name:
+            prev_base_name = base_name
+            madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
+            if wc_dict is None or not check_writing_condition(wc_dict, base_name):
+                continue
+            if madcat_file_path is not None:
+                get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
+
+
+if __name__ == '__main__':
+      main()
+
diff --git a/egs/madcat_ar/v1/local/tl/make_features.py b/egs/madcat_ar/v1/local/tl/make_features.py
new file mode 100755
index 00000000000..e9d10ecc87e
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/make_features.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2018  Hossein Hadian
+
+""" This script converts images to Kaldi-format feature matrices. The input to
+    this script is the path to a data directory, e.g. "data/train". This script
+    reads the images listed in images.scp and writes them to standard output
+    (by default) as Kaldi-formatted matrices (in text form). It also scales the
+    images so they have the same height (via --feat-dim). It can optionally pad
+    the images (on left/right sides) with white pixels.
+    If an 'image2num_frames' file is found in the data dir, it will be used
+    to enforce the images to have the specified length in that file by padding
+    white pixels (the --padding option will be ignored in this case). This relates
+    to end2end chain training.
+
+    eg. local/make_features.py data/train --feat-dim 40
+"""
+import random
+import argparse
+import os
+import sys
+import numpy as np
+from scipy import misc
+import math
+
+parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
+                                                writes them to standard output in text format.""")
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
+parser.add_argument('--out-ark', type=str, default='-',
+                    help='Where to write the output feature file')
+parser.add_argument('--feat-dim', type=int, default=40,
+                    help='Size to scale the height of all images')
+parser.add_argument('--padding', type=int, default=5,
+                    help='Number of white pixels to pad on the left'
+                    'and right side of the image.')
+parser.add_argument('--vertical-shift', type=int, default=16,
+                    help='total number of padding pixel per column')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+args = parser.parse_args()
+
+
+def write_kaldi_matrix(file_handle, matrix, key):
+    file_handle.write(key + " [ ")
+    num_rows = len(matrix)
+    if num_rows == 0:
+        raise Exception("Matrix is empty")
+    num_cols = len(matrix[0])
+
+    for row_index in range(len(matrix)):
+        if num_cols != len(matrix[row_index]):
+            raise Exception("All the rows of a matrix are expected to "
+                            "have the same length")
+        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        if row_index != num_rows - 1:
+            file_handle.write("\n")
+    file_handle.write(" ]\n")
+
+
+def get_scaled_image(im):
+    scale_size = args.feat_dim
+    sx = im.shape[1]  # width
+    sy = im.shape[0]  # height
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx)
+    im = misc.imresize(im, (nx, ny))
+    return im
+
+
+def horizontal_pad(im, allowed_lengths = None):
+    if allowed_lengths is None:
+        left_padding = right_padding = args.padding
+    else:  # Find an allowed length for the image
+        imlen = im.shape[1] # width
+        allowed_len = 0
+        for l in allowed_lengths:
+            if l > imlen:
+                allowed_len = l
+                break
+        if allowed_len == 0:
+            #  No allowed length was found for the image (the image is too long)
+            return None
+        padding = allowed_len - imlen
+        left_padding = int(padding // 2)
+        right_padding = padding - left_padding
+    dim_y = im.shape[0] # height
+    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
+                                           dtype=int), im), axis=1)
+    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
+                                                    dtype=int)), axis=1)
+    return im_pad1
+
+def vertical_shift(im, mode='mid'):
+    total = args.vertical_shift
+    if mode == 'notmid':
+        val = random.randint(0, 1)
+        if val == 0:
+            mode = 'top'
+        else:
+            mode = 'bottom'
+    if mode == 'mid':
+        top = int(total / 2)
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
+if args.out_ark == '-':
+    out_fh = sys.stdout
+else:
+    out_fh = open(args.out_ark,'w')
+
+allowed_lengths = None
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
+    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
+    allowed_lengths = []
+    with open(allowed_len_handle) as f:
+        for line in f:
+            allowed_lengths.append(int(line.strip()))
+    print("Read {} allowed lengths and will apply them to the "
+          "features.".format(len(allowed_lengths)), file=sys.stderr)
+
+num_fail = 0
+num_ok = 0
+aug_setting = ['mid', 'notmid']
+with open(data_list_path) as f:
+    for line in f:
+        line = line.strip()
+        line_vect = line.split(' ')
+        image_id = line_vect[0]
+        image_path = line_vect[1]
+        im = misc.imread(image_path)
+        im_scaled = get_scaled_image(im)
+        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
+        if im_horizontal_padded is None:
+            num_fail += 1
+            continue
+        if args.augment:
+            im_shift = vertical_shift(im_horizontal_padded, shift_setting[1])
+        else:
+            im_shift = vertical_shift(im_horizontal_padded, shift_setting[0])
+        data = np.transpose(im_shift, (1, 0))
+        data = np.divide(data, 255.0)
+        num_ok += 1
+        write_kaldi_matrix(out_fh, data, image_id)
+
+print('Generated features for {} images. Failed for {} (image too '
+      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/local/tl/prepare_data.sh b/egs/madcat_ar/v1/local/tl/prepare_data.sh
new file mode 100755
index 00000000000..5fe41e7cf4c
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/prepare_data.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2017  Hossein Hadian
+# Apache 2.0
+
+# This script prepares the training and test data for MADCAT Arabic dataset 
+# (i.e text, images.scp, utt2spk and spk2utt). It calls process_data.py.
+
+#  Eg. local/prepare_data.sh
+#  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 ﻮﺠﻫ ﻮﻌﻘﻟ ﻍﺍﺮﻗ ﺢﺗّﻯ ﺎﻠﻨﺧﺎﻋ
+#      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+#      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
+#      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
+
+stage=0
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+images_scp_dir=data/local
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+mkdir -p data/{train,test,dev}
+
+if [ $stage -le 1 ]; then
+  echo "$0: Processing dev, train and test data...$(date)"
+  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+    $data_splits_dir/madcat.dev.raw.lineid data/dev $images_scp_dir/dev/images.scp \
+    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
+
+  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+    $data_splits_dir/madcat.train.raw.lineid data/train $images_scp_dir/train/images.scp \
+    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
+
+  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+
+  for dataset in dev test train; do
+    echo "$0: Fixing data directory for dataset: $dataset."
+    image/fix_data_dir.sh data/$dataset
+  done
+fi
diff --git a/egs/madcat_ar/v1/local/tl/process_data.py b/egs/madcat_ar/v1/local/tl/process_data.py
new file mode 100755
index 00000000000..c21beb1be70
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/process_data.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+
+# Copyright  2018  Ashish Arora
+
+""" This script reads MADCAT files and creates the following files (for the
+    data subset selected via --dataset) :text, utt2spk, images.scp.
+  Eg. local/process_data.py data/local /export/corpora/LDC/LDC2012T15 /export/corpora/LDC/LDC2013T09
+      /export/corpora/LDC/LDC2013T15 data/download/data_splits/madcat.train.raw.lineid
+      data/dev data/local/lines/images.scp
+  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 وجه وعقل غارق حتّى النخاع
+      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
+      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
+"""
+
+import argparse
+import os
+import sys
+import xml.dom.minidom as minidom
+import unicodedata
+
+parser = argparse.ArgumentParser(description="Creates text, utt2spk and images.scp files",
+                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
+                                 " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
+                                 " data/train data/local/lines ",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('database_path1', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('database_path2', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('database_path3', type=str,
+                    help='Path to the downloaded (and extracted) madcat data')
+parser.add_argument('data_splits', type=str,
+                    help='Path to file that contains the train/test/dev split information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files.')
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of input images.scp file(maps line image and location)')
+parser.add_argument('writing_condition1', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 1')
+parser.add_argument('writing_condition2', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 2')
+parser.add_argument('writing_condition3', type=str,
+                    help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
+args = parser.parse_args()
+
+
+def check_file_location():
+    """ Returns the complete path of the page image and corresponding
+        xml file.
+    Args:
+    Returns:
+        image_file_name (string): complete path and name of the page image.
+        madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    """
+    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
+    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
+
+    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
+    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
+    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
+
+    if os.path.exists(madcat_file_path1):
+        return madcat_file_path1, image_file_path1, wc_dict1
+
+    if os.path.exists(madcat_file_path2):
+        return madcat_file_path2, image_file_path2, wc_dict2
+
+    if os.path.exists(madcat_file_path3):
+        return madcat_file_path3, image_file_path3, wc_dict3
+
+    return None, None, None
+
+
+def parse_writing_conditions(writing_conditions):
+    """ Returns a dictionary which have writing condition of each page image.
+    Args:
+         writing_conditions(string): complete path of writing condition file.
+    Returns:
+        (dict): dictionary with key as page image name and value as writing condition.
+    """
+    with open(writing_conditions) as f:
+        file_writing_cond = dict()
+        for line in f:
+            line_list = line.strip().split("\t")
+            file_writing_cond[line_list[0]] = line_list[3]
+    return file_writing_cond
+
+
+def check_writing_condition(wc_dict):
+    """ Checks if a given page image is writing in a given writing condition.
+        It is used to create subset of dataset based on writing condition.
+    Args:
+         wc_dict (dict): dictionary with key as page image name and value as writing condition.
+    Returns:
+        (bool): True if writing condition matches.
+    """
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
+
+
+def read_text(madcat_file_path):
+    """ Maps every word in the page image to a  corresponding line.
+    Args:
+        madcat_file_path (string): complete path and name of the madcat xml file
+                                  corresponding to the page image.
+    Returns:
+        dict: Mapping every word in the page image to a  corresponding line.
+    """
+
+    word_line_dict = dict()
+    doc = minidom.parse(madcat_file_path)
+    zone = doc.getElementsByTagName('zone')
+    for node in zone:
+        line_id = node.getAttribute('id')
+        word_image = node.getElementsByTagName('token-image')
+        for tnode in word_image:
+            word_id = tnode.getAttribute('id')
+            word_line_dict[word_id] = line_id
+
+    text_line_word_dict = dict()
+    segment = doc.getElementsByTagName('segment')
+    for node in segment:
+        token = node.getElementsByTagName('token')
+        for tnode in token:
+            ref_word_id = tnode.getAttribute('ref_id')
+            word = tnode.getElementsByTagName('source')[0].firstChild.nodeValue
+            ref_line_id = word_line_dict[ref_word_id]
+            if ref_line_id not in text_line_word_dict:
+                text_line_word_dict[ref_line_id] = list()
+            text_line_word_dict[ref_line_id].append(word)
+    return text_line_word_dict
+
+
+def get_line_image_location():
+    image_loc_dict = dict()  # Stores image base name and location
+    image_loc_vect = input_image_fh.read().strip().split("\n")
+    for line in image_loc_vect:
+        base_name = os.path.basename(line)
+        location_vect = line.split('/')
+        location = "/".join(location_vect[:-1])
+        image_loc_dict[base_name]=location
+    return image_loc_dict
+
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+input_image_file = args.images_scp_path
+input_image_fh = open(input_image_file, 'r', encoding='utf-8')
+
+wc_dict1 = parse_writing_conditions(args.writing_condition1)
+wc_dict2 = parse_writing_conditions(args.writing_condition2)
+wc_dict3 = parse_writing_conditions(args.writing_condition3)
+image_loc_dict = get_line_image_location()
+
+image_num = 0
+with open(args.data_splits) as f:
+    prev_base_name = ''
+    for line in f:
+        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
+        if prev_base_name != base_name:
+            prev_base_name = base_name
+            madcat_xml_path, image_file_path, wc_dict = check_file_location()
+            if wc_dict is None or not check_writing_condition(wc_dict):
+                continue
+            madcat_doc = minidom.parse(madcat_xml_path)
+            writer = madcat_doc.getElementsByTagName('writer')
+            writer_id = writer[0].getAttribute('id')
+            text_line_word_dict = read_text(madcat_xml_path)
+            base_name = os.path.basename(image_file_path).split('.tif')[0]
+            for line_id in sorted(text_line_word_dict):
+                if args.augment:
+                    key = (line_id + '.')[:-1]
+                    for i in range(0, 3):
+                        location_id = '_' + line_id + '_scale' + str(i)
+                        line_image_file_name = base_name + location_id + '.png'
+                        location = image_loc_dict[line_image_file_name]
+                        image_file_path = os.path.join(location, line_image_file_name)
+                        line = text_line_word_dict[key]
+                        text = ' '.join(line)
+                        base_line_image_file_name = line_image_file_name.split('.png')[0]
+                        utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_line_image_file_name
+                        text_fh.write(utt_id + ' ' + text + '\n')
+                        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                        image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                        image_num += 1
+                else:
+                    updated_base_name = base_name + '_' + str(line_id).zfill(4) +'.png'
+                    location = image_loc_dict[updated_base_name]
+                    image_file_path = os.path.join(location, updated_base_name)
+                    line = text_line_word_dict[line_id]
+                    text = ' '.join(line)
+                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(line_id).zfill(4)
+                    text_fh.write(utt_id + ' ' + text + '\n')
+                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                    image_num += 1
diff --git a/egs/madcat_ar/v1/local/tl/process_waldo_data.py b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
new file mode 100755
index 00000000000..df8b6c5149f
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser(description="Creates text, utt2spk and images.scp files",
+                                 epilog="E.g.  " + sys.argv[0] + " data/train data/local/lines ",
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('image_transcription_file', type=str,
+                    help='Path to the file containing line image path and transcription information')
+parser.add_argument('out_dir', type=str,
+                    help='directory location to write output files.')
+args = parser.parse_args()
+
+
+def read_image_text(image_text_path):
+    """ Given the file path containing, mapping information of line image
+     and transcription, it returns a dict. The dict contains this mapping
+    info. It can be accessed via line_id and will provide transcription.
+    Returns:
+    --------
+    dict: line_id and transcription mapping
+    """
+    image_transcription_dict = dict()
+    with open(image_text_path, encoding='utf-8') as f:
+        for line in f:
+            line_vect = line.strip().split(' ')
+            image_path = line_vect[0]
+            line_id = os.path.basename(image_path).split('.png')[0]
+            transcription = line_vect[1:]
+            #transcription = " ".join(transcription)
+            #image_transcription_dict[line_id] = transcription
+            joined_transcription = list()
+            for word in transcription:
+                joined_transcription.append(word)
+            joined_transcription = " ".join(joined_transcription)
+            image_transcription_dict[line_id] = joined_transcription
+    return image_transcription_dict
+
+
+### main ###
+print("Processing '{}' data...".format(args.out_dir))
+
+text_file = os.path.join(args.out_dir, 'text')
+text_fh = open(text_file, 'w', encoding='utf-8')
+utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
+image_file = os.path.join(args.out_dir, 'images.scp')
+image_fh = open(image_file, 'w', encoding='utf-8')
+
+image_transcription_dict = read_image_text(args.image_transcription_file)
+for line_id in image_transcription_dict:
+        writer_id = line_id.strip().split('_')[-3]
+        updated_line_id = line_id + '.png'
+        image_file_path = os.path.join('lines', updated_line_id)
+        text = image_transcription_dict[line_id]
+        utt_id = line_id
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
+
diff --git a/egs/madcat_ar/v1/local/tl/run_end2end.sh b/egs/madcat_ar/v1/local/tl/run_end2end.sh
new file mode 100755
index 00000000000..1ff5b549180
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/run_end2end.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Copyright 2017    Hossein Hadian
+#           2018    Ashish Arora
+set -e
+stage=0
+nj=30
+# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# This corpus can be purchased here:
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+./local/check_tools.sh
+
+mkdir -p data/{train,test,dev}/data
+mkdir -p data/local/{train,test,dev}
+if [ $stage -le 0 ]; then
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)"
+  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+
+  for dataset in train dev; do
+    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
+        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
+        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
+        --data data/local/$dataset
+  done
+
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+      --download_dir3 $download_dir3 --images_scp_dir data/local \
+      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
+      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  for set in dev test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    steps/compute_cmvn_stats.sh data/$set || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset $(date)."
+  image/fix_data_dir.sh data/train
+
+fi
+
+if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing BPE..."
+  cut -d' ' -f2- data/train/text | local/reverse.py | \
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
+  for set in test train dev; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | local/reverse.py | \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > data/$set/bpe_text
+
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    #rm -f data/$set/bpe_text data/$set/ids
+  done
+
+  echo "$0:Preparing dictionary and lang..."
+  local/prepare_dict.sh
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang_test
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
+  local/chain/run_flatstart_cnn1a.sh --nj $nj
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
+  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
+  local/chain/run_cnn_e2eali_1b.sh --nj $nj
+fi
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
new file mode 100755
index 00000000000..5d27476d3e1
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright 2017    Hossein Hadian
+#           2018    Ashish Arora
+set -e
+stage=0
+nj=70
+# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
+# already downloaded the database you can set it to a local directory
+# This corpus can be purchased here:
+# https://catalog.ldc.upenn.edu/LDC2012T15,
+# https://catalog.ldc.upenn.edu/LDC2013T09/,
+# https://catalog.ldc.upenn.edu/LDC2013T15/.
+download_dir1=/export/corpora/LDC/LDC2012T15/data
+download_dir2=/export/corpora/LDC/LDC2013T09/data
+download_dir3=/export/corpora/LDC/LDC2013T15/data
+writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
+writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
+writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
+data_splits_dir=data/download/data_splits
+
+. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
+           ## This relates to the queue.
+. ./path.sh
+. ./utils/parse_options.sh  # e.g. this parses the above options
+                            # if supplied.
+./local/check_tools.sh
+
+mkdir -p data/{train,test,dev}/data
+mkdir -p data/local/{train,test,dev}
+
+if [ $stage -le 0 ]; then
+  echo "$0: Downloading data splits..."
+  echo "Date: $(date)."
+  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+fi
+
+if [ $stage -le 1 ]; then
+  for dataset in test train dev; do
+    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
+        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
+        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
+        --data data/local/$dataset
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
+      --download_dir3 $download_dir3 --images_scp_dir data/local \
+      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
+      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames"
+  echo "Date: $(date)."
+  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  echo "$0: Obtaining image groups. calling get_allowed_lengths"
+  echo "Date: $(date)."
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+fi
+
+if [ $stage -le 4 ]; then
+  for dataset in test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
+    echo "Date: $(date)."
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
+    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset"
+  echo "Date: $(date)."
+  utils/fix_data_dir.sh data/train
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Preparing dictionary and lang..."
+  cut -d' ' -f2- data/train/text | local/reverse.py | \
+    local/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+  for set in test train dev; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | local/reverse.py | \
+      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      | sed 's/@@//g' > data/$set/bpe_text
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+  done
+  local/prepare_dict.sh
+  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
+  # So we set --sil-prob to 0.0
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
+fi
+
+if [ $stage -le 6 ]; then
+  echo "$0: Estimating a language model for decoding..."
+  local/train_lm.sh
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang_test
+fi
+
+if [ $stage -le 7 ]; then
+  echo "$0: Calling the flat-start chain recipe..."
+  echo "Date: $(date)."
+  local/chain/run_flatstart_cnn1a.sh --nj $nj
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: Aligning the training data using the e2e chain model..."
+  echo "Date: $(date)."
+  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --use-gpu false \
+                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
+                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+fi
+
+if [ $stage -le 9 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
+  echo "Date: $(date)."
+  local/chain/run_cnn_e2eali_1b.sh --nj $nj
+fi

From 1bd1448d1c543de817510fb984d67a571ac4dc59 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 22:22:50 -0400
Subject: [PATCH 16/67] adding gpu = false for alignments in runend2end

---
 egs/madcat_ar/v1/run_end2end.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index e5ca540d3c1..3986ede9d7f 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -107,6 +107,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi

From 895342a9a0b5a9ddbfcb02bf55511d1a2f5addc8 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 22:30:39 -0400
Subject: [PATCH 17/67] updating text localization routine

---
 egs/madcat_ar/v1/local/tl/run_end2end.sh      | 124 ------------------
 .../v1/local/tl/run_textlocalization.sh       |  95 +++++++-------
 2 files changed, 46 insertions(+), 173 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/tl/run_end2end.sh

diff --git a/egs/madcat_ar/v1/local/tl/run_end2end.sh b/egs/madcat_ar/v1/local/tl/run_end2end.sh
deleted file mode 100755
index 1ff5b549180..00000000000
--- a/egs/madcat_ar/v1/local/tl/run_end2end.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-# Copyright 2017    Hossein Hadian
-#           2018    Ashish Arora
-set -e
-stage=0
-nj=30
-# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
-# already downloaded the database you can set it to a local directory
-# This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
-download_dir1=/export/corpora/LDC/LDC2012T15/data
-download_dir2=/export/corpora/LDC/LDC2013T09/data
-download_dir3=/export/corpora/LDC/LDC2013T15/data
-writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
-writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
-writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
-data_splits_dir=data/download/data_splits
-
-. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
-           ## This relates to the queue.
-. ./path.sh
-. ./utils/parse_options.sh  # e.g. this parses the above options
-                            # if supplied.
-./local/check_tools.sh
-
-mkdir -p data/{train,test,dev}/data
-mkdir -p data/local/{train,test,dev}
-if [ $stage -le 0 ]; then
-
-  if [ -f data/train/text ] && ! $overwrite; then
-    echo "$0: Not processing, probably script have run from wrong stage"
-    echo "Exiting with status 1 to avoid data corruption"
-    exit 1;
-  fi
-  echo "$0: Downloading data splits...$(date)"
-  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
-                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
-
-  for dataset in train dev; do
-    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
-    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
-        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
-        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset
-  done
-
-  echo "$0: Preparing data..."
-  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-      --download_dir3 $download_dir3 --images_scp_dir data/local \
-      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
-      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
-fi
-
-if [ $stage -le 1 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
-  image/get_image2num_frames.py data/train
-  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in dev test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
-    steps/compute_cmvn_stats.sh data/$set || exit 1;
-  done
-  echo "$0: Fixing data directory for train dataset $(date)."
-  image/fix_data_dir.sh data/train
-
-fi
-
-if [ $stage -le 2 ]; then
-  for set in train; do
-    echo "$(date) stage 2: Performing augmentation, it will double training data"
-    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
-    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
-  done
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | local/reverse.py | \
-    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
-
-  for set in test train dev; do
-    cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
-      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
-      | sed 's/@@//g' > data/$set/bpe_text
-
-    mv data/$set/text data/$set/text.old
-    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-    #rm -f data/$set/bpe_text data/$set/ids
-  done
-
-  echo "$0:Preparing dictionary and lang..."
-  local/prepare_dict.sh
-  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
-                        data/local/dict "<sil>" data/lang/temp data/lang
-  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: Estimating a language model for decoding..."
-  local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
-fi
-
-if [ $stage -le 4 ]; then
-  echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_flatstart_cnn1a.sh --nj $nj
-fi
-
-if [ $stage -le 5 ]; then
-  echo "$0: Aligning the training data using the e2e chain model...$(date)."
-  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
-fi
-
-if [ $stage -le 6 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali_1b.sh --nj $nj
-fi
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 5d27476d3e1..3211e93e120 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -3,13 +3,11 @@
 #           2018    Ashish Arora
 set -e
 stage=0
-nj=70
+nj=30
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -17,6 +15,7 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+overwrite=false
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
@@ -27,16 +26,18 @@ data_splits_dir=data/download/data_splits
 
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
-
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
-  for dataset in test train dev; do
+  for dataset in train dev; do
     data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
@@ -44,9 +45,7 @@ if [ $stage -le 1 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$dataset
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
       --download_dir3 $download_dir3 --images_scp_dir data/local \
@@ -54,75 +53,73 @@ if [ $stage -le 2 ]; then
       --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
 fi
 
-if [ $stage -le 3 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames"
-  echo "Date: $(date)."
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  echo "$0: Obtaining image groups. calling get_allowed_lengths"
-  echo "Date: $(date)."
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  for set in dev test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    steps/compute_cmvn_stats.sh data/$set || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset $(date)."
+  image/fix_data_dir.sh data/train
+
 fi
 
-if [ $stage -le 4 ]; then
-  for dataset in test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
-    echo "Date: $(date)."
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
-    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
-  echo "$0: Fixing data directory for train dataset"
-  echo "Date: $(date)."
-  utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 5 ]; then
-  echo "$0: Preparing dictionary and lang..."
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
-    local/prepend_words.py | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
+
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    #rm -f data/$set/bpe_text data/$set/ids
   done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
-if [ $stage -le 7 ]; then
-  echo "$0: Calling the flat-start chain recipe..."
-  echo "Date: $(date)."
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
   local/chain/run_flatstart_cnn1a.sh --nj $nj
 fi
 
-if [ $stage -le 8 ]; then
-  echo "$0: Aligning the training data using the e2e chain model..."
-  echo "Date: $(date)."
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  echo "Date: $(date)."
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
   local/chain/run_cnn_e2eali_1b.sh --nj $nj
 fi

From a72d9224066396e12149d8d43ca701a79b34c4ea Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 23:04:19 -0400
Subject: [PATCH 18/67] removing unused function

---
 .../create_line_image_from_page_image.py      | 45 -------------------
 1 file changed, 45 deletions(-)

diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index a91fe55ed3e..b6af4cbe717 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -211,50 +211,6 @@ def get_orientation(origin, p1, p2):
     return difference
 
 
-def compute_hull(points):
-    """
-    Given input list of points, return a list of points that
-    made up the convex hull.
-    Returns
-    -------
-    [(float, float)]: convexhull points
-    """
-    hull_points = []
-    start = points[0]
-    min_x = start[0]
-    for p in points[1:]:
-        if p[0] < min_x:
-            min_x = p[0]
-            start = p
-
-    point = start
-    hull_points.append(start)
-
-    far_point = None
-    while far_point is not start:
-        p1 = None
-        for p in points:
-            if p is point:
-                continue
-            else:
-                p1 = p
-                break
-
-        far_point = p1
-
-        for p2 in points:
-            if p2 is point or p2 is p1:
-                continue
-            else:
-                direction = get_orientation(point, far_point, p2)
-                if direction > 0:
-                    far_point = p2
-
-        hull_points.append(far_point)
-        point = far_point
-    return hull_points
-
-
 def minimum_bounding_box(points):
     """ Given a list of 2D points, it returns the minimum area rectangle bounding all
         the points in the point cloud.
@@ -274,7 +230,6 @@ def minimum_bounding_box(points):
 
     hull_ordered = [points[index] for index in ConvexHull(points).vertices]
     hull_ordered.append(hull_ordered[0])
-    #hull_ordered = compute_hull(points)
     hull_ordered = tuple(hull_ordered)
 
     min_rectangle = bounding_area(0, hull_ordered)

From b2ef92343fbe23fadef604d99de0546f8dd09154 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 23:09:29 -0400
Subject: [PATCH 19/67] minor change

---
 egs/madcat_ar/v1/local/create_line_image_from_page_image.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index b6af4cbe717..34e339f1877 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -500,7 +500,6 @@ def check_writing_condition(wc_dict, base_name):
         return True
 
 ### main ###
-
 def main():
 
     wc_dict1 = parse_writing_conditions(args.writing_condition1)
@@ -520,8 +519,7 @@ def main():
             madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
             if wc_dict is None or not check_writing_condition(wc_dict, base_name):
                 continue
-            if madcat_file_path is not None:
-                get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
+            get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
 
 
 if __name__ == '__main__':

From b8974aae6011b30b6fbe746693c8dc05f28f6b47 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Mon, 17 Sep 2018 23:56:02 -0400
Subject: [PATCH 20/67] adding option for augmentation

---
 ...t => create_line_image_from_page_image.py} | 55 ++++++++++++++++---
 1 file changed, 46 insertions(+), 9 deletions(-)
 rename egs/madcat_ar/v1/local/tl/{create_line_image_from_page_image.py.augment => create_line_image_from_page_image.py} (89%)

diff --git a/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
similarity index 89%
rename from egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment
rename to egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
index faf0d3503c7..bb126c39538 100755
--- a/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py.augment
+++ b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
@@ -52,6 +52,8 @@
                     help='padding across horizontal/verticle direction')
 parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
                    help="only processes subset of data based on writing condition")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 """
@@ -401,10 +403,48 @@ def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh)
                 minimum_bounding_box_input.append(word_coordinate)
         updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
         points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
-        for i in range(0, 3):
-            additional_pixel = random.randint(1, args.pixel_scaling)
-            mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
-            bounding_box = minimum_bounding_box(mar)
+        if args.augment:
+            for i in range(0, 3):
+                additional_pixel = random.randint(1, args.pixel_scaling)
+                mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
+                bounding_box = minimum_bounding_box(mar)
+                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+                min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+                max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+                box = (min_x, min_y, max_x, max_y)
+                region_initial = im.crop(box)
+                rot_points = []
+                p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+                p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+                rot_points.append(p1)
+                rot_points.append(p2)
+                rot_points.append(p3)
+                rot_points.append(p4)
+
+                cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                        bounding_box.length_parallel,
+                        bounding_box.length_orthogonal,
+                        bounding_box.length_orthogonal,
+                        bounding_box.unit_vector,
+                        bounding_box.unit_vector_angle,
+                        set(rot_points)
+                    )
+
+                rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+                img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+                x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+                    cropped_bounding_box, get_center(region_initial))
+
+                min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                box = (min_x, min_y, max_x, max_y)
+                region_final = img2.crop(box)
+                line_id = id + '_scale' + str(i)
+                set_line_image_data(region_final, line_id, image_file_name, image_fh)
+        else:
+            bounding_box = minimum_bounding_box(points_ordered)
             (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
             min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
             max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
@@ -438,8 +478,7 @@ def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh)
             max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
             box = (min_x, min_y, max_x, max_y)
             region_final = img2.crop(box)
-            line_id = id + '_scale' + str(i)
-            set_line_image_data(region_final, line_id, image_file_name, image_fh)
+            set_line_image_data(region_final, id, image_file_name, image_fh)
 
 
 def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
@@ -501,7 +540,6 @@ def check_writing_condition(wc_dict, base_name):
         return True
 
 ### main ###
-
 def main():
 
     wc_dict1 = parse_writing_conditions(args.writing_condition1)
@@ -521,8 +559,7 @@ def main():
             madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
             if wc_dict is None or not check_writing_condition(wc_dict, base_name):
                 continue
-            if madcat_file_path is not None:
-                get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
+            get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
 
 
 if __name__ == '__main__':

From 04b938c01d38cbbcae5801d6e4391eb4d831ecf3 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 18 Sep 2018 00:03:44 -0400
Subject: [PATCH 21/67] updating text localization routines

---
 ...t => create_line_image_from_page_image.py} |  73 ++++--
 .../v1/local/tl/{imp => }/make_features.py    |   0
 .../tl/not_much_imp/run_cnn_e2eali_1b.sh      | 246 ------------------
 .../v1/local/tl/not_much_imp/run_end2end.sh   | 124 ---------
 .../tl/not_much_imp/run_flatstart_cnn1a.sh    | 168 ------------
 .../tl/{not_much_imp => }/prepare_data.sh     |   0
 .../v1/local/tl/{imp => }/process_data.py     |   0
 .../local/tl/{imp => }/process_waldo_data.py  |   0
 .../v1/local/tl/run_textlocalization.sh       |  95 ++++---
 9 files changed, 102 insertions(+), 604 deletions(-)
 rename egs/madcat_ar/v1/local/tl/{imp/create_line_image_from_page_image.py.augment => create_line_image_from_page_image.py} (87%)
 rename egs/madcat_ar/v1/local/tl/{imp => }/make_features.py (100%)
 delete mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
 delete mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
 delete mode 100755 egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
 rename egs/madcat_ar/v1/local/tl/{not_much_imp => }/prepare_data.sh (100%)
 rename egs/madcat_ar/v1/local/tl/{imp => }/process_data.py (100%)
 rename egs/madcat_ar/v1/local/tl/{imp => }/process_waldo_data.py (100%)

diff --git a/egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
similarity index 87%
rename from egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment
rename to egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
index da2b0f0a62f..bb126c39538 100755
--- a/egs/madcat_ar/v1/local/tl/imp/create_line_image_from_page_image.py.augment
+++ b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
@@ -50,6 +50,10 @@
                     help='padding across horizontal/verticle direction')
 parser.add_argument('--pixel-scaling', type=int, default=30,
                     help='padding across horizontal/verticle direction')
+parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="only processes subset of data based on writing condition")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 """
@@ -331,7 +335,8 @@ def update_minimum_bounding_box_input(bounding_box_input):
 
 
 def dilate_polygon(points, amount_increase):
-    """ Increases size of polygon given as a list of tuples. Assumes points in polygon are given in CCW
+    """ Increases size of polygon given as a list of tuples.
+        Assumes points in polygon are given in CCW
     """
     expanded_points = []
     for index, point in enumerate(points):
@@ -398,10 +403,48 @@ def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh)
                 minimum_bounding_box_input.append(word_coordinate)
         updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
         points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
-        for i in range(0, 3):
-            additional_pixel = random.randint(1, args.pixel_scaling)
-            mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
-            bounding_box = minimum_bounding_box(mar)
+        if args.augment:
+            for i in range(0, 3):
+                additional_pixel = random.randint(1, args.pixel_scaling)
+                mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
+                bounding_box = minimum_bounding_box(mar)
+                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+                min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+                max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+                box = (min_x, min_y, max_x, max_y)
+                region_initial = im.crop(box)
+                rot_points = []
+                p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+                p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+                rot_points.append(p1)
+                rot_points.append(p2)
+                rot_points.append(p3)
+                rot_points.append(p4)
+
+                cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                        bounding_box.length_parallel,
+                        bounding_box.length_orthogonal,
+                        bounding_box.length_orthogonal,
+                        bounding_box.unit_vector,
+                        bounding_box.unit_vector_angle,
+                        set(rot_points)
+                    )
+
+                rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+                img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+                x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+                    cropped_bounding_box, get_center(region_initial))
+
+                min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                box = (min_x, min_y, max_x, max_y)
+                region_final = img2.crop(box)
+                line_id = id + '_scale' + str(i)
+                set_line_image_data(region_final, line_id, image_file_name, image_fh)
+        else:
+            bounding_box = minimum_bounding_box(points_ordered)
             (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
             min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
             max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
@@ -435,8 +478,7 @@ def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh)
             max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
             box = (min_x, min_y, max_x, max_y)
             region_final = img2.crop(box)
-            line_id = id + '_scale' + str(i)
-            set_line_image_data(region_final, line_id, image_file_name, image_fh)
+            set_line_image_data(region_final, id, image_file_name, image_fh)
 
 
 def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
@@ -490,16 +532,14 @@ def check_writing_condition(wc_dict, base_name):
     Returns
     (bool): True if writing condition matches.
     """
-    #return True
-    writing_condition = wc_dict[base_name].strip()
-    if writing_condition != 'IUC':
-        return False
-
-    return True
-
+    if args.subset:
+        writing_condition = wc_dict[base_name].strip()
+        if writing_condition != 'IUC':
+            return False
+    else:
+        return True
 
 ### main ###
-
 def main():
 
     wc_dict1 = parse_writing_conditions(args.writing_condition1)
@@ -519,8 +559,7 @@ def main():
             madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
             if wc_dict is None or not check_writing_condition(wc_dict, base_name):
                 continue
-            if madcat_file_path is not None:
-                get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
+            get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
 
 
 if __name__ == '__main__':
diff --git a/egs/madcat_ar/v1/local/tl/imp/make_features.py b/egs/madcat_ar/v1/local/tl/make_features.py
similarity index 100%
rename from egs/madcat_ar/v1/local/tl/imp/make_features.py
rename to egs/madcat_ar/v1/local/tl/make_features.py
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
deleted file mode 100755
index f44b12667e9..00000000000
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_cnn_e2eali_1b.sh
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/bin/bash
-
-# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
-# lattice alignments and to build a tree
-
-# local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b
-# System                      cnn_e2eali_1b
-# WER                             10.78
-# CER                              2.99
-# Final train prob              -0.0587
-# Final valid prob              -0.0609
-# Final train prob (xent)       -0.4471
-# Final valid prob (xent)       -0.4653
-# Parameters                      3.37M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
-#exp/chain/cnn_e2eali_1b: num-iters=179 nj=8..16 num-params=3.4M dim=40->416 combine=-0.058->-0.058 (over 3) xent:train/valid[118,178,final]=(-0.463,-0.445,-0.447/-0.477,-0.462,-0.465) logprob:train/valid[118,178,final]=(-0.062,-0.059,-0.059/-0.063,-0.061,-0.061)
-
-set -e -o pipefail
-
-stage=0
-
-nj=30
-train_set=train
-nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-common_egs_dir=
-reporting_email=
-
-# chain options
-train_stage=-10
-xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
-chunk_width=340,300,200,100
-num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-tdnn_dim=450
-# training options
-srand=0
-remove_egs=true
-lang_test=lang_test
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-ali_dir=exp/chain/e2e_ali_train
-lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
-dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
-train_data_dir=data/${train_set}
-tree_dir=exp/chain${nnet3_affix}/tree_e2e
-e2echain_model_dir=exp/chain/e2e_cnn_1a
-
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 1 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 2 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
-                            --acoustic-scale 1.0 \
-                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
-  echo "" >$lat_dir/splice_opts
-
-fi
-
-if [ $stage -le 3 ]; then
-  # Build a tree using our new topology.  We know we have alignments for the
-  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
-  # those.  The num-leaves is always somewhat less than the num-leaves from
-  # the GMM baseline.
-  if [ -f $tree_dir/final.mdl ]; then
-    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-    exit 1;
-  fi
-
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
-    --alignment-subsampling-factor 1 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=56 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' mod?els... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 5 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage=$train_stage \
-    --cmd="$cmd" \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient=0.1 \
-    --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=1 \
-    --chain.left-tolerance 3 \
-    --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
-    --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=2 \
-    --trainer.frames-per-iter=1000000 \
-    --trainer.optimization.num-jobs-initial=3 \
-    --trainer.optimization.num-jobs-final=16 \
-    --trainer.optimization.initial-effective-lrate=0.001 \
-    --trainer.optimization.final-effective-lrate=0.0001 \
-    --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
-    --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
-    --egs.dir="$common_egs_dir" \
-    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
-    --use-gpu=true \
-    --reporting.email="$reporting_email" \
-    --feat-dir=$train_data_dir \
-    --tree-dir=$tree_dir \
-    --lat-dir=$lat_dir \
-    --dir=$dir  || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
deleted file mode 100755
index 1ff5b549180..00000000000
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_end2end.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-# Copyright 2017    Hossein Hadian
-#           2018    Ashish Arora
-set -e
-stage=0
-nj=30
-# download_dir{1,2,3} points to the database path on the JHU grid. If you have not
-# already downloaded the database you can set it to a local directory
-# This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
-download_dir1=/export/corpora/LDC/LDC2012T15/data
-download_dir2=/export/corpora/LDC/LDC2013T09/data
-download_dir3=/export/corpora/LDC/LDC2013T15/data
-writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
-writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
-writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
-data_splits_dir=data/download/data_splits
-
-. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
-           ## This relates to the queue.
-. ./path.sh
-. ./utils/parse_options.sh  # e.g. this parses the above options
-                            # if supplied.
-./local/check_tools.sh
-
-mkdir -p data/{train,test,dev}/data
-mkdir -p data/local/{train,test,dev}
-if [ $stage -le 0 ]; then
-
-  if [ -f data/train/text ] && ! $overwrite; then
-    echo "$0: Not processing, probably script have run from wrong stage"
-    echo "Exiting with status 1 to avoid data corruption"
-    exit 1;
-  fi
-  echo "$0: Downloading data splits...$(date)"
-  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
-                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
-
-  for dataset in train dev; do
-    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
-    local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
-        --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-        --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
-        --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset
-  done
-
-  echo "$0: Preparing data..."
-  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-      --download_dir3 $download_dir3 --images_scp_dir data/local \
-      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
-      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
-fi
-
-if [ $stage -le 1 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
-  image/get_image2num_frames.py data/train
-  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in dev test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
-    steps/compute_cmvn_stats.sh data/$set || exit 1;
-  done
-  echo "$0: Fixing data directory for train dataset $(date)."
-  image/fix_data_dir.sh data/train
-
-fi
-
-if [ $stage -le 2 ]; then
-  for set in train; do
-    echo "$(date) stage 2: Performing augmentation, it will double training data"
-    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
-    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
-  done
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | local/reverse.py | \
-    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
-
-  for set in test train dev; do
-    cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
-      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
-      | sed 's/@@//g' > data/$set/bpe_text
-
-    mv data/$set/text data/$set/text.old
-    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-    #rm -f data/$set/bpe_text data/$set/ids
-  done
-
-  echo "$0:Preparing dictionary and lang..."
-  local/prepare_dict.sh
-  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
-                        data/local/dict "<sil>" data/lang/temp data/lang
-  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
-fi
-
-if [ $stage -le 3 ]; then
-  echo "$0: Estimating a language model for decoding..."
-  local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
-fi
-
-if [ $stage -le 4 ]; then
-  echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_flatstart_cnn1a.sh --nj $nj
-fi
-
-if [ $stage -le 5 ]; then
-  echo "$0: Aligning the training data using the e2e chain model...$(date)."
-  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
-fi
-
-if [ $stage -le 6 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali_1b.sh --nj $nj
-fi
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh b/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
deleted file mode 100755
index 4893dcfea08..00000000000
--- a/egs/madcat_ar/v1/local/tl/not_much_imp/run_flatstart_cnn1a.sh
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/bin/bash
-# Copyright    2017  Hossein Hadian
-
-# This script does end2end chain training (i.e. from scratch)
-
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
-# System                      e2e_cnn_1a
-# WER                             10.71
-# CER                              2.85
-# Final train prob              -0.0859
-# Final valid prob              -0.1266
-# Final train prob (xent)
-# Final valid prob (xent)
-# Parameters                      2.94M
-
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
-# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
-
-set -e
-
-
-# configs for 'chain'
-stage=0
-nj=30
-train_stage=-10
-get_egs_stage=-10
-affix=1a
-
-# training options
-tdnn_dim=450
-num_epochs=2
-num_jobs_initial=3
-num_jobs_final=16
-minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
-common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=1000000
-cmvn_opts="--norm-means=false --norm-vars=false"
-train_set=train
-lang_test=lang_test
-
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-lang=data/lang_e2e
-treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
-dir=exp/chain/e2e_cnn_${affix}
-
-if [ $stage -le 0 ]; then
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  rm -rf $lang
-  cp -r data/lang $lang
-  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-  # Use our special topology... note that later on may have to tune this
-  # topology.
-  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-fi
-
-if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
-                                       --shared-phones true \
-                                       --type mono \
-                                       data/$train_set $lang $treedir
-  $cmd $treedir/log/make_phone_lm.log \
-  cat data/$train_set/text \| \
-    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
-    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
-    chain-est-phone-lm --num-extra-lm-states=500 \
-                       ark:- $treedir/phone_lm.fst
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=56 name=input
-  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-EOF
-
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
-fi
-
-if [ $stage -le 3 ]; then
-  # no need to store the egs in a shared storage because we always
-  # remove them. Anyway, it takes only 5 minutes to generate them.
-
-  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
-    --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
-    --chain.apply-deriv-weights false \
-    --egs.dir "$common_egs_dir" \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
-    --chain.frame-subsampling-factor 4 \
-    --chain.alignment-subsampling-factor 4 \
-    --trainer.add-option="--optimization.memory-compression-level=2" \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
-    --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
-    --trainer.optimization.initial-effective-lrate 0.001 \
-    --trainer.optimization.final-effective-lrate 0.0001 \
-    --trainer.optimization.shrink-value 1.0 \
-    --trainer.max-param-change 2.0 \
-    --cleanup.remove-egs true \
-    --feat-dir data/${train_set} \
-    --tree-dir $treedir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh b/egs/madcat_ar/v1/local/tl/prepare_data.sh
similarity index 100%
rename from egs/madcat_ar/v1/local/tl/not_much_imp/prepare_data.sh
rename to egs/madcat_ar/v1/local/tl/prepare_data.sh
diff --git a/egs/madcat_ar/v1/local/tl/imp/process_data.py b/egs/madcat_ar/v1/local/tl/process_data.py
similarity index 100%
rename from egs/madcat_ar/v1/local/tl/imp/process_data.py
rename to egs/madcat_ar/v1/local/tl/process_data.py
diff --git a/egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
similarity index 100%
rename from egs/madcat_ar/v1/local/tl/imp/process_waldo_data.py
rename to egs/madcat_ar/v1/local/tl/process_waldo_data.py
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 5d27476d3e1..3211e93e120 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -3,13 +3,11 @@
 #           2018    Ashish Arora
 set -e
 stage=0
-nj=70
+nj=30
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
-# https://catalog.ldc.upenn.edu/LDC2012T15,
-# https://catalog.ldc.upenn.edu/LDC2013T09/,
-# https://catalog.ldc.upenn.edu/LDC2013T15/.
+# https://catalog.ldc.upenn.edu/{LDC2012T15,LDC2013T09/,LDC2013T15/}
 download_dir1=/export/corpora/LDC/LDC2012T15/data
 download_dir2=/export/corpora/LDC/LDC2013T09/data
 download_dir3=/export/corpora/LDC/LDC2013T15/data
@@ -17,6 +15,7 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+overwrite=false
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
@@ -27,16 +26,18 @@ data_splits_dir=data/download/data_splits
 
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
-
 if [ $stage -le 0 ]; then
-  echo "$0: Downloading data splits..."
-  echo "Date: $(date)."
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+  echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
-  for dataset in test train dev; do
+  for dataset in train dev; do
     data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
@@ -44,9 +45,7 @@ if [ $stage -le 1 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$dataset
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
       --download_dir3 $download_dir3 --images_scp_dir data/local \
@@ -54,75 +53,73 @@ if [ $stage -le 2 ]; then
       --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
 fi
 
-if [ $stage -le 3 ]; then
-  echo "$0: Obtaining image groups. calling get_image2num_frames"
-  echo "Date: $(date)."
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
-  # The next command creates a "allowed_lengths.txt" file in data/train
-  # which will be used by local/make_features.py to enforce the images to
-  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  echo "$0: Obtaining image groups. calling get_allowed_lengths"
-  echo "Date: $(date)."
+if [ $stage -le 1 ]; then
+  echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
+  image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  for set in dev test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    steps/compute_cmvn_stats.sh data/$set || exit 1;
+  done
+  echo "$0: Fixing data directory for train dataset $(date)."
+  image/fix_data_dir.sh data/train
+
 fi
 
-if [ $stage -le 4 ]; then
-  for dataset in test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. "
-    echo "Date: $(date)."
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
-    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
-  echo "$0: Fixing data directory for train dataset"
-  echo "Date: $(date)."
-  utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 5 ]; then
-  echo "$0: Preparing dictionary and lang..."
+if [ $stage -le 3 ]; then
+  echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
-    local/prepend_words.py | \
-    utils/lang/bpe/learn_bpe.py -s 700 > data/train/bpe.out
+    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      local/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/train/bpe.out \
+      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
+
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    #rm -f data/$set/bpe_text data/$set/ids
   done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations.
-  # So we set --sil-prob to 0.0
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
                         data/local/dict "<sil>" data/lang/temp data/lang
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                      data/local/dict/lexicon.txt data/lang_test
 fi
 
-if [ $stage -le 7 ]; then
-  echo "$0: Calling the flat-start chain recipe..."
-  echo "Date: $(date)."
+if [ $stage -le 4 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
   local/chain/run_flatstart_cnn1a.sh --nj $nj
 fi
 
-if [ $stage -le 8 ]; then
-  echo "$0: Aligning the training data using the e2e chain model..."
-  echo "Date: $(date)."
+if [ $stage -le 5 ]; then
+  echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 9 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  echo "Date: $(date)."
+if [ $stage -le 6 ]; then
+  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
   local/chain/run_cnn_e2eali_1b.sh --nj $nj
 fi

From 92a470da6c151e23c872e0760a99b8f730439132 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 14:12:27 -0400
Subject: [PATCH 22/67] removing unnecessary files

---
 .../v1/local/chain/run_flatstart_cnn1b.sh     | 164 ------------------
 egs/madcat_ar/v1/path.sh                      |   1 -
 egs/madcat_ar/v1/run_end2end.sh               |   1 +
 egs/wsj/s5/utils/lang/bpe/prepend_words.py    |  11 +-
 egs/wsj/s5/utils/lang/make_lexicon_fst.py     |   2 +-
 5 files changed, 5 insertions(+), 174 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh

diff --git a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh b/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
deleted file mode 100755
index 901903a9bba..00000000000
--- a/egs/madcat_ar/v1/local/chain/run_flatstart_cnn1b.sh
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/bin/bash
-# Copyright    2017  Hossein Hadian
-
-# This script does end2end chain training (i.e. from scratch)
-
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
-# System                      e2e_cnn_1a
-# WER                             10.71
-# CER                              2.85
-# Final train prob              -0.0859
-# Final valid prob              -0.1266
-# Final train prob (xent)
-# Final valid prob (xent)
-# Parameters                      2.94M
-
-# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
-# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
-
-set -e
-
-# configs for 'chain'
-stage=0
-nj=70
-train_stage=-10
-get_egs_stage=-10
-affix=1b
-
-# training options
-tdnn_dim=550
-minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
-common_egs_dir=
-cmvn_opts="--norm-means=true --norm-vars=true"
-train_set=train
-lang_test=lang_test
-dropout_schedule='0,0@0.20,0.2@0.50,0'
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-lang=data/lang_e2e
-treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
-dir=exp/chain/e2e_cnn_${affix}
-
-if [ $stage -le 0 ]; then
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  rm -rf $lang
-  cp -r data/lang $lang
-  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-  # Use our special topology... note that later on may have to tune this
-  # topology.
-  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-fi
-
-if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
-                                       --shared-phones true \
-                                       --type mono \
-                                       data/$train_set $lang $treedir
-  $cmd $treedir/log/make_phone_lm.log \
-  cat data/$train_set/text \| \
-    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
-    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
-    chain-est-phone-lm --num-extra-lm-states=500 \
-                       ark:- $treedir/phone_lm.fst
-fi
-
-if [ $stage -le 2 ]; then
-  echo "$0: creating neural net configs using the xconfig parser";
-  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
-  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
-  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
-  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim dropout-proportion=0.0
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-EOF
-
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
-fi
-
-if [ $stage -le 3 ]; then
-  # no need to store the egs in a shared storage because we always
-  # remove them. Anyway, it takes only 5 minutes to generate them.
-
-  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
-    --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
-    --chain.apply-deriv-weights false \
-    --egs.dir "$common_egs_dir" \
-    --egs.stage $get_egs_stage \
-    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
-    --chain.frame-subsampling-factor 4 \
-    --chain.alignment-subsampling-factor 4 \
-    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
-    --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize 0.00005 \
-    --trainer.add-option="--optimization.memory-compression-level=2" \
-    --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter 2000000 \
-    --trainer.num-epochs 2 \
-    --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial 6 \
-    --trainer.optimization.num-jobs-final 16 \
-    --trainer.dropout-schedule $dropout_schedule \
-    --trainer.optimization.initial-effective-lrate 0.001 \
-    --trainer.optimization.final-effective-lrate 0.0001 \
-    --trainer.optimization.shrink-value 1.0 \
-    --trainer.max-param-change 2.0 \
-    --cleanup.remove-egs true \
-    --feat-dir data/${train_set} \
-    --tree-dir $treedir \
-    --dir $dir  || exit 1;
-fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/path.sh b/egs/madcat_ar/v1/path.sh
index 252d4ab04fe..2d17b17a84a 100755
--- a/egs/madcat_ar/v1/path.sh
+++ b/egs/madcat_ar/v1/path.sh
@@ -3,5 +3,4 @@ export KALDI_ROOT=`pwd`/../../..
 export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
-export CUDA_CACHE_DISABLE=1
 export LC_ALL=C
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index e5ca540d3c1..3986ede9d7f 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -107,6 +107,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
diff --git a/egs/wsj/s5/utils/lang/bpe/prepend_words.py b/egs/wsj/s5/utils/lang/bpe/prepend_words.py
index d497344e850..face771c7ca 100755
--- a/egs/wsj/s5/utils/lang/bpe/prepend_words.py
+++ b/egs/wsj/s5/utils/lang/bpe/prepend_words.py
@@ -4,16 +4,11 @@
 # the beginning of the words for finding the initial-space of every word
 # after decoding.
 
-import argparse
 import sys, io
 
-parser = argparse.ArgumentParser(description="Prepends '|' to the beginning of every word")
-parser.add_argument('--encoding', type=str, default='latin-1',
-                    help='Type of encoding')
-args = parser.parse_args()
-
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
-output = io.TextIOWrapper(sys.stdout.buffer, encoding=args.encoding)
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
+output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
 for line in infile:
     output.write(' '.join([ "|"+word for word in line.split()]) + '\n')
 
+
diff --git a/egs/wsj/s5/utils/lang/make_lexicon_fst.py b/egs/wsj/s5/utils/lang/make_lexicon_fst.py
index 89c50b2f069..67ed0ac2789 100755
--- a/egs/wsj/s5/utils/lang/make_lexicon_fst.py
+++ b/egs/wsj/s5/utils/lang/make_lexicon_fst.py
@@ -72,7 +72,7 @@ def read_lexiconp(filename):
     with open(filename, 'r', encoding='latin-1') as f:
         whitespace = re.compile("[ \t]+")
         for line in f:
-            a = whitespace.split(line.rstrip('\n'))
+            a = whitespace.split(line.strip())
             if len(a) < 2:
                 print("{0}: error: found bad line '{1}' in lexicon file {2} ".format(
                     sys.argv[0], line.strip(), filename), file=sys.stderr)

From e7b7597301ce59eb1e1ed9833cf0853c12d82c17 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:23:45 -0400
Subject: [PATCH 23/67] adding lm rescoring, cleaning in chain scripts

---
 .../v1/local/chain/tuning/run_cnn_1a.sh       |  11 +-
 .../local/chain/tuning/run_cnn_chainali_1a.sh |  11 +-
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   |   9 +-
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh   |   8 +-
 egs/madcat_ar/v1/local/extract_features.sh    |   6 +-
 egs/madcat_ar/v1/local/make_features.py       | 138 ------------------
 egs/madcat_ar/v1/run_end2end.sh               |   6 +-
 7 files changed, 40 insertions(+), 149 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/make_features.py

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
index a3a98ce5ad5..02d095b3a82 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
@@ -32,7 +32,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -207,7 +208,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -221,4 +222,10 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index b652eab034a..5faf6a73691 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -29,7 +29,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -209,7 +210,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -223,4 +224,10 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 55df0cad4b7..b0b77be2a18 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -42,7 +42,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=true
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -240,4 +241,10 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 033cb88df10..bf215a0cae2 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -36,7 +36,8 @@ l2_regularize=0.00005
 frames_per_iter=2000000
 cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -152,7 +153,7 @@ if [ $stage -le 4 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -161,6 +162,9 @@ if [ $stage -le 5 ]; then
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 56a8443e328..4ed6ba04348 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -9,6 +9,8 @@
 nj=4
 cmd=run.pl
 feat_dim=40
+augment=false
+fliplr=false
 echo "$0 $@"
 
 . ./cmd.sh
@@ -34,9 +36,9 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  local/make_features.py $logdir/images.JOB.scp \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim \| \
+    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
deleted file mode 100755
index a21276d32c2..00000000000
--- a/egs/madcat_ar/v1/local/make_features.py
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2018  Hossein Hadian
-
-""" This script converts images to Kaldi-format feature matrices. The input to
-    this script is the path to a data directory, e.g. "data/train". This script
-    reads the images listed in images.scp and writes them to standard output
-    (by default) as Kaldi-formatted matrices (in text form). It also scales the
-    images so they have the same height (via --feat-dim). It can optionally pad
-    the images (on left/right sides) with white pixels.
-    If an 'image2num_frames' file is found in the data dir, it will be used
-    to enforce the images to have the specified length in that file by padding
-    white pixels (the --padding option will be ignored in this case). This relates
-    to end2end chain training.
-
-    eg. local/make_features.py data/train --feat-dim 40
-"""
-
-import argparse
-import os
-import sys
-import numpy as np
-from scipy import misc
-
-parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
-                                                writes them to standard output in text format.""")
-parser.add_argument('images_scp_path', type=str,
-                    help='Path of images.scp file')
-parser.add_argument('--allowed_len_file_path', type=str, default=None,
-                    help='If supplied, each images will be padded to reach the '
-                    'target length (this overrides --padding).')
-parser.add_argument('--out-ark', type=str, default='-',
-                    help='Where to write the output feature file')
-parser.add_argument('--feat-dim', type=int, default=40,
-                    help='Size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5,
-                    help='Number of white pixels to pad on the left'
-                    'and right side of the image.')
-
-
-args = parser.parse_args()
-
-
-def write_kaldi_matrix(file_handle, matrix, key):
-    file_handle.write(key + " [ ")
-    num_rows = len(matrix)
-    if num_rows == 0:
-        raise Exception("Matrix is empty")
-    num_cols = len(matrix[0])
-
-    for row_index in range(len(matrix)):
-        if num_cols != len(matrix[row_index]):
-            raise Exception("All the rows of a matrix are expected to "
-                            "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
-        if row_index != num_rows - 1:
-            file_handle.write("\n")
-    file_handle.write(" ]\n")
-
-
-def get_scaled_image(im):
-    scale_size = args.feat_dim
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    return im
-
-
-def horizontal_pad(im, allowed_lengths = None):
-    if allowed_lengths is None:
-        left_padding = right_padding = args.padding
-    else:  # Find an allowed length for the image
-        imlen = im.shape[1] # width
-        allowed_len = 0
-        for l in allowed_lengths:
-            if l > imlen:
-                allowed_len = l
-                break
-        if allowed_len == 0:
-            #  No allowed length was found for the image (the image is too long)
-            return None
-        padding = allowed_len - imlen
-        left_padding = int(padding // 2)
-        right_padding = padding - left_padding
-    dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
-                                           dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
-                                                    dtype=int)), axis=1)
-    return im_pad1
-
-
-### main ###
-
-data_list_path = args.images_scp_path
-
-if args.out_ark == '-':
-    out_fh = sys.stdout
-else:
-    out_fh = open(args.out_ark,'wb')
-
-allowed_lengths = None
-allowed_len_handle = args.allowed_len_file_path
-if os.path.isfile(allowed_len_handle):
-    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
-    allowed_lengths = []
-    with open(allowed_len_handle) as f:
-        for line in f:
-            allowed_lengths.append(int(line.strip()))
-    print("Read {} allowed lengths and will apply them to the "
-          "features.".format(len(allowed_lengths)), file=sys.stderr)
-
-num_fail = 0
-num_ok = 0
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im)
-        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
-        if im_horizontal_padded is None:
-            num_fail += 1
-            continue
-        data = np.transpose(im_horizontal_padded, (1, 0))
-        data = np.divide(data, 255.0)
-        num_ok += 1
-        write_kaldi_matrix(out_fh, data, image_id)
-
-print('Generated features for {} images. Failed for {} (image too '
-      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 3986ede9d7f..a24b851331b 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -95,8 +95,10 @@ fi
 if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                               data/lang data/lang_rescore_6g
 fi
 
 if [ $stage -le 4 ]; then

From e647607d495eee3115fb1f799be5f4c897cd39d4 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:27:59 -0400
Subject: [PATCH 24/67] minor fix

---
 .../v1/local/chain/tuning/run_cnn_e2eali_1a.sh        | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 38387ce2fcc..38de5fe3b7c 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -29,7 +29,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=true
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -213,7 +214,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -227,4 +228,10 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir

From a0d2b6879d1ea061ac801bb8291acdf3104018af Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:31:43 -0400
Subject: [PATCH 25/67] removing prepend words

---
 egs/madcat_ar/v1/local/prepend_words.py | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/prepend_words.py

diff --git a/egs/madcat_ar/v1/local/prepend_words.py b/egs/madcat_ar/v1/local/prepend_words.py
deleted file mode 100755
index d53eb8974bf..00000000000
--- a/egs/madcat_ar/v1/local/prepend_words.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# This script, prepend '|' to every words in the transcript to mark
-# the beginning of the words for finding the initial-space of every word
-# after decoding.
-
-import sys, io
-
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-for line in infile:
-    output.write(' '.join(["|" + word for word in line.split()]) + '\n')

From 53edde450b1803a60d0cb7f28b4448bd52f0abff Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:33:14 -0400
Subject: [PATCH 26/67] minor bug fix

---
 egs/madcat_ar/v1/run_end2end.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index a24b851331b..859cced6c17 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -70,13 +70,13 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: Preparing BPE..."
   cut -d' ' -f2- data/train/text | local/reverse.py | \
-    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+      utils/lang/bpe/prepend_words.py | \
       utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
 

From e9ae85369f7723e2379cc6cea4c568ffa6a41c6a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:51:43 -0400
Subject: [PATCH 27/67] fixing run.sh

---
 egs/madcat_ar/v1/run.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index f6a63320497..2e2d0d7af90 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -83,8 +83,10 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                               data/lang data/lang_rescore_6g
 fi
 
 if [ $stage -le 6 ]; then

From 8d0c7930a20eba30bc654d16a00e09ec966a7f04 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:57:38 -0400
Subject: [PATCH 28/67] removing prepare data

---
 egs/madcat_ar/v1/local/prepare_data.sh | 53 --------------------------
 egs/madcat_ar/v1/run_end2end.sh        | 12 ++++--
 2 files changed, 8 insertions(+), 57 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/prepare_data.sh

diff --git a/egs/madcat_ar/v1/local/prepare_data.sh b/egs/madcat_ar/v1/local/prepare_data.sh
deleted file mode 100755
index d808d736845..00000000000
--- a/egs/madcat_ar/v1/local/prepare_data.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2017  Hossein Hadian
-# Apache 2.0
-
-# This script prepares the training and test data for MADCAT Arabic dataset 
-# (i.e text, images.scp, utt2spk and spk2utt). It calls process_data.py.
-
-#  Eg. local/prepare_data.sh
-#  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 ﻮﺠﻫ ﻮﻌﻘﻟ ﻍﺍﺮﻗ ﺢﺗّﻯ ﺎﻠﻨﺧﺎﻋ
-#      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
-#      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
-#      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
-
-stage=0
-download_dir1=/export/corpora/LDC/LDC2012T15/data
-download_dir2=/export/corpora/LDC/LDC2013T09/data
-download_dir3=/export/corpora/LDC/LDC2013T15/data
-writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
-writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
-writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
-data_splits_dir=data/download/data_splits
-images_scp_dir=data/local
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh || exit 1;
-
-mkdir -p data/{train,test,dev}
-
-if [ $stage -le 1 ]; then
-  echo "$0: Processing dev, train and test data..."
-  echo "Date: $(date)."
-  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
-    $data_splits_dir/madcat.dev.raw.lineid data/dev $images_scp_dir/dev/images.scp \
-    $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
-
-  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
-    $data_splits_dir/madcat.test.raw.lineid data/test $images_scp_dir/test/images.scp \
-    $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
-
-  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
-    $data_splits_dir/madcat.train.raw.lineid data/train $images_scp_dir/train/images.scp \
-    $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
-
-  for dataset in dev test train; do
-    echo "$0: Fixing data directory for dataset: $dataset"
-    echo "Date: $(date)."
-    image/fix_data_dir.sh data/$dataset
-  done
-fi
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 859cced6c17..48832e3159b 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -47,10 +47,14 @@ if [ $stage -le 0 ]; then
   done
 
   echo "$0: Preparing data..."
-  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-      --download_dir3 $download_dir3 --images_scp_dir data/local \
-      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
-      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+  for set in dev train test; do
+    local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+      $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
+      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
+      data/local/splits/${set}.txt data/${set}
+    image/fix_data_dir.sh data/${set}
+  done
+
 fi
 
 if [ $stage -le 1 ]; then

From ee582d50d6c46f4bd9293f1bf4243c65f7c54542 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 15:59:35 -0400
Subject: [PATCH 29/67] fixing run.sh

---
 egs/madcat_ar/v1/run.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 2e2d0d7af90..7922bf30ed6 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -57,10 +57,13 @@ fi
 
 if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
-  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-      --download_dir3 $download_dir3 --images_scp_dir data/local \
-      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
-      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+  for set in dev train test; do
+    local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+      $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
+      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
+      data/local/splits/${set}.txt data/${set}
+    image/fix_data_dir.sh data/${set}
+  done
 fi
 
 mkdir -p data/{train,test,dev}/data

From a16a11d8145d323254a00fd088187994eca96d0f Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 16:02:30 -0400
Subject: [PATCH 30/67] removing reverse.py

---
 egs/madcat_ar/v1/run_end2end.sh                              | 4 ++--
 egs/{madcat_ar/v1/local => wsj/s5/utils/lang/bpe}/reverse.py | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename egs/{madcat_ar/v1/local => wsj/s5/utils/lang/bpe}/reverse.py (100%)

diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 48832e3159b..6ab6e8ff32d 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -73,13 +73,13 @@ fi
 
 if [ $stage -le 2 ]; then
   echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | local/reverse.py | \
+  cut -d' ' -f2- data/train/text | utilis/lang/bpe/reverse.py | \
     utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | local/reverse.py | \
+    cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
       utils/lang/bpe/prepend_words.py | \
       utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
diff --git a/egs/madcat_ar/v1/local/reverse.py b/egs/wsj/s5/utils/lang/bpe/reverse.py
similarity index 100%
rename from egs/madcat_ar/v1/local/reverse.py
rename to egs/wsj/s5/utils/lang/bpe/reverse.py

From fb0b8a25363b443d3687d8446bc8a7c1565e4db4 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 19:09:45 -0400
Subject: [PATCH 31/67] removing prepare data

---
 egs/madcat_ar/v1/local/tl/prepare_data.sh     | 49 -------------------
 .../v1/local/tl/run_textlocalization.sh       | 31 +++++++-----
 2 files changed, 20 insertions(+), 60 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/tl/prepare_data.sh

diff --git a/egs/madcat_ar/v1/local/tl/prepare_data.sh b/egs/madcat_ar/v1/local/tl/prepare_data.sh
deleted file mode 100755
index 5fe41e7cf4c..00000000000
--- a/egs/madcat_ar/v1/local/tl/prepare_data.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2017  Hossein Hadian
-# Apache 2.0
-
-# This script prepares the training and test data for MADCAT Arabic dataset 
-# (i.e text, images.scp, utt2spk and spk2utt). It calls process_data.py.
-
-#  Eg. local/prepare_data.sh
-#  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 ﻮﺠﻫ ﻮﻌﻘﻟ ﻍﺍﺮﻗ ﺢﺗّﻯ ﺎﻠﻨﺧﺎﻋ
-#      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
-#      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
-#      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
-
-stage=0
-download_dir1=/export/corpora/LDC/LDC2012T15/data
-download_dir2=/export/corpora/LDC/LDC2013T09/data
-download_dir3=/export/corpora/LDC/LDC2013T15/data
-writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
-writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
-writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
-data_splits_dir=data/download/data_splits
-images_scp_dir=data/local
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh || exit 1;
-
-mkdir -p data/{train,test,dev}
-
-if [ $stage -le 1 ]; then
-  echo "$0: Processing dev, train and test data...$(date)"
-  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
-    $data_splits_dir/madcat.dev.raw.lineid data/dev $images_scp_dir/dev/images.scp \
-    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
-
-  local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
-    $data_splits_dir/madcat.train.raw.lineid data/train $images_scp_dir/train/images.scp \
-    $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
-
-  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
-
-  for dataset in dev test train; do
-    echo "$0: Fixing data directory for dataset: $dataset."
-    image/fix_data_dir.sh data/$dataset
-  done
-fi
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 3211e93e120..cd5c96e368e 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -47,10 +47,16 @@ if [ $stage -le 0 ]; then
   done
 
   echo "$0: Preparing data..."
-  local/prepare_data.sh --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
-      --download_dir3 $download_dir3 --images_scp_dir data/local \
-      --data_splits_dir $data_splits_dir --writing_condition1 $writing_condition1 \
-      --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3
+  for set in dev train; do
+    local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
+      $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
+      $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
+      data/local/splits/${set}.txt data/${set}
+    image/fix_data_dir.sh data/${set}
+  done
+
+  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+  image/fix_data_dir.sh data/test
 fi
 
 if [ $stage -le 1 ]; then
@@ -77,20 +83,20 @@ fi
 
 if [ $stage -le 3 ]; then
   echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | local/reverse.py | \
-    utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+  cut -d' ' -f2- data/train/text | utilis/lang/bpe/reverse.py | \
+    utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
   for set in test train dev; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
-    cut -d' ' -f2- data/$set/text | local/reverse.py | \
-      utils/lang/bpe/prepend_words.py --encoding 'utf-8' | \
+    cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
+      utils/lang/bpe/prepend_words.py | \
       utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
       | sed 's/@@//g' > data/$set/bpe_text
 
     mv data/$set/text data/$set/text.old
     paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
-    #rm -f data/$set/bpe_text data/$set/ids
+    rm -f data/$set/bpe_text data/$set/ids
   done
 
   echo "$0:Preparing dictionary and lang..."
@@ -103,8 +109,10 @@ fi
 if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
+                               data/lang data/lang_rescore_6g
 fi
 
 if [ $stage -le 4 ]; then
@@ -115,6 +123,7 @@ fi
 if [ $stage -le 5 ]; then
   echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
+                       --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
                        data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi

From 7835ed4926f7fa56345487f5b25b56fb22ba7a45 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 19:16:30 -0400
Subject: [PATCH 32/67] adding augmentation during line image creation,
 removing unnecessary files

---
 .../create_line_image_from_page_image.py      | 180 +++---
 egs/madcat_ar/v1/local/process_data.py        |  40 +-
 .../tl/create_line_image_from_page_image.py   | 567 ------------------
 egs/madcat_ar/v1/local/tl/process_data.py     | 215 -------
 4 files changed, 139 insertions(+), 863 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
 delete mode 100755 egs/madcat_ar/v1/local/tl/process_data.py

diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index 34e339f1877..bb126c39538 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -21,22 +21,10 @@
 import numpy as np
 from math import atan2, cos, sin, pi, degrees, sqrt
 from collections import namedtuple
-
+import random
 from scipy.spatial import ConvexHull
 from PIL import Image
 from scipy.misc import toimage
-import logging
-
-sys.path.insert(0, 'steps')
-logger = logging.getLogger('libs')
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler()
-handler.setLevel(logging.INFO)
-formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - "
-                              "%(funcName)s - %(levelname)s ] %(message)s")
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
 parser = argparse.ArgumentParser(description="Creates line images from page image",
                                  epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
                                              " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
@@ -60,8 +48,12 @@
                     help='Path to the downloaded (and extracted) writing conditions file 3')
 parser.add_argument('--padding', type=int, default=400,
                     help='padding across horizontal/verticle direction')
+parser.add_argument('--pixel-scaling', type=int, default=30,
+                    help='padding across horizontal/verticle direction')
 parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
                    help="only processes subset of data based on writing condition")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 """
@@ -196,21 +188,6 @@ def rectangle_corners(rectangle):
     return rotate_points(rectangle['rectangle_center'], rectangle['unit_vector_angle'], corner_points)
 
 
-def get_orientation(origin, p1, p2):
-    """
-    Given origin and two points, return the orientation of the Point p1 with
-    regards to Point p2 using origin.
-    Returns
-    -------
-    integer: Negative if p1 is clockwise of p2.
-    """
-    difference = (
-        ((p2[0] - origin[0]) * (p1[1] - origin[1]))
-        - ((p1[0] - origin[0]) * (p2[1] - origin[1]))
-    )
-    return difference
-
-
 def minimum_bounding_box(points):
     """ Given a list of 2D points, it returns the minimum area rectangle bounding all
         the points in the point cloud.
@@ -357,6 +334,36 @@ def update_minimum_bounding_box_input(bounding_box_input):
     return updated_minimum_bounding_box_input
 
 
+def dilate_polygon(points, amount_increase):
+    """ Increases size of polygon given as a list of tuples.
+        Assumes points in polygon are given in CCW
+    """
+    expanded_points = []
+    for index, point in enumerate(points):
+        prev_point = points[(index - 1) % len(points)]
+        next_point = points[(index + 1) % len(points)]
+        prev_edge = np.subtract(point, prev_point)
+        next_edge = np.subtract(next_point, point)
+
+        prev_normal = ((1 * prev_edge[1]), (-1 * prev_edge[0]))
+        prev_normal = np.divide(prev_normal, np.linalg.norm(prev_normal))
+        next_normal = ((1 * next_edge[1]), (-1 * next_edge[0]))
+        next_normal = np.divide(next_normal, np.linalg.norm(next_normal))
+
+        bisect = np.add(prev_normal, next_normal)
+        bisect = np.divide(bisect, np.linalg.norm(bisect))
+
+        cos_theta = np.dot(next_normal, bisect)
+        hyp = amount_increase / cos_theta
+
+        new_point = np.around(point + hyp * bisect)
+        new_point = new_point.astype(int)
+        new_point = new_point.tolist()
+        new_point = tuple(new_point)
+        expanded_points.append(new_point)
+    return expanded_points
+
+
 def set_line_image_data(image, line_id, image_file_name, image_fh):
     """ Given an image, saves a flipped line image. Line image file name
         is formed by appending the line id at the end page image name.
@@ -395,50 +402,83 @@ def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh)
                 word_coordinate = (int(word_node.getAttribute('x')), int(word_node.getAttribute('y')))
                 minimum_bounding_box_input.append(word_coordinate)
         updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
-        bounding_box = minimum_bounding_box(updated_mbb_input)
-
-        p1, p2, p3, p4 = bounding_box.corner_points
-        x1, y1 = p1
-        x2, y2 = p2
-        x3, y3 = p3
-        x4, y4 = p4
-        min_x = int(min(x1, x2, x3, x4))
-        min_y = int(min(y1, y2, y3, y4))
-        max_x = int(max(x1, x2, x3, x4))
-        max_y = int(max(y1, y2, y3, y4))
-        box = (min_x, min_y, max_x, max_y)
-        region_initial = im.crop(box)
-        rot_points = []
-        p1_new = (x1 - min_x, y1 - min_y)
-        p2_new = (x2 - min_x, y2 - min_y)
-        p3_new = (x3 - min_x, y3 - min_y)
-        p4_new = (x4 - min_x, y4 - min_y)
-        rot_points.append(p1_new)
-        rot_points.append(p2_new)
-        rot_points.append(p3_new)
-        rot_points.append(p4_new)
-
-        cropped_bounding_box = bounding_box_tuple(bounding_box.area,
-                bounding_box.length_parallel,
-                bounding_box.length_orthogonal,
-                bounding_box.length_orthogonal,
-                bounding_box.unit_vector,
-                bounding_box.unit_vector_angle,
-                set(rot_points)
-            )
-
-        rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
-        img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
-        x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+        points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
+        if args.augment:
+            for i in range(0, 3):
+                additional_pixel = random.randint(1, args.pixel_scaling)
+                mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
+                bounding_box = minimum_bounding_box(mar)
+                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+                min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+                max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+                box = (min_x, min_y, max_x, max_y)
+                region_initial = im.crop(box)
+                rot_points = []
+                p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+                p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+                rot_points.append(p1)
+                rot_points.append(p2)
+                rot_points.append(p3)
+                rot_points.append(p4)
+
+                cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                        bounding_box.length_parallel,
+                        bounding_box.length_orthogonal,
+                        bounding_box.length_orthogonal,
+                        bounding_box.unit_vector,
+                        bounding_box.unit_vector_angle,
+                        set(rot_points)
+                    )
+
+                rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+                img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+                x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
+                    cropped_bounding_box, get_center(region_initial))
+
+                min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+                max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+                box = (min_x, min_y, max_x, max_y)
+                region_final = img2.crop(box)
+                line_id = id + '_scale' + str(i)
+                set_line_image_data(region_final, line_id, image_file_name, image_fh)
+        else:
+            bounding_box = minimum_bounding_box(points_ordered)
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
+            min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
+            max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
+            box = (min_x, min_y, max_x, max_y)
+            region_initial = im.crop(box)
+            rot_points = []
+            p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
+            p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
+            rot_points.append(p1)
+            rot_points.append(p2)
+            rot_points.append(p3)
+            rot_points.append(p4)
+
+            cropped_bounding_box = bounding_box_tuple(bounding_box.area,
+                    bounding_box.length_parallel,
+                    bounding_box.length_orthogonal,
+                    bounding_box.length_orthogonal,
+                    bounding_box.unit_vector,
+                    bounding_box.unit_vector_angle,
+                    set(rot_points)
+                )
+
+            rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
+            img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
+            x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
                 cropped_bounding_box, get_center(region_initial))
 
-        min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-        min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-        max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-        max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-        box = (min_x, min_y, max_x, max_y)
-        region_final = img2.crop(box)
-        set_line_image_data(region_final, id, image_file_name, image_fh)
+            min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
+            max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
+            box = (min_x, min_y, max_x, max_y)
+            region_final = img2.crop(box)
+            set_line_image_data(region_final, id, image_file_name, image_fh)
 
 
 def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index 920cb6f700b..c21beb1be70 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -42,6 +42,8 @@
                     help='Path to the downloaded (and extracted) writing conditions file 2')
 parser.add_argument('writing_condition3', type=str,
                     help='Path to the downloaded (and extracted) writing conditions file 3')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
                    help="only processes subset of data based on writing condition")
 args = parser.parse_args()
@@ -184,14 +186,30 @@ def get_line_image_location():
             writer_id = writer[0].getAttribute('id')
             text_line_word_dict = read_text(madcat_xml_path)
             base_name = os.path.basename(image_file_path).split('.tif')[0]
-            for lineID in sorted(text_line_word_dict):
-                updated_base_name = base_name + '_' + str(lineID).zfill(4) +'.png'
-                location = image_loc_dict[updated_base_name]
-                image_file_path = os.path.join(location, updated_base_name)
-                line = text_line_word_dict[lineID]
-                text = ' '.join(line)
-                utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(lineID).zfill(4)
-                text_fh.write(utt_id + ' ' + text + '\n')
-                utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-                image_fh.write(utt_id + ' ' + image_file_path + '\n')
-                image_num += 1
+            for line_id in sorted(text_line_word_dict):
+                if args.augment:
+                    key = (line_id + '.')[:-1]
+                    for i in range(0, 3):
+                        location_id = '_' + line_id + '_scale' + str(i)
+                        line_image_file_name = base_name + location_id + '.png'
+                        location = image_loc_dict[line_image_file_name]
+                        image_file_path = os.path.join(location, line_image_file_name)
+                        line = text_line_word_dict[key]
+                        text = ' '.join(line)
+                        base_line_image_file_name = line_image_file_name.split('.png')[0]
+                        utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_line_image_file_name
+                        text_fh.write(utt_id + ' ' + text + '\n')
+                        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                        image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                        image_num += 1
+                else:
+                    updated_base_name = base_name + '_' + str(line_id).zfill(4) +'.png'
+                    location = image_loc_dict[updated_base_name]
+                    image_file_path = os.path.join(location, updated_base_name)
+                    line = text_line_word_dict[line_id]
+                    text = ' '.join(line)
+                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(line_id).zfill(4)
+                    text_fh.write(utt_id + ' ' + text + '\n')
+                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
+                    image_num += 1
diff --git a/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
deleted file mode 100755
index bb126c39538..00000000000
--- a/egs/madcat_ar/v1/local/tl/create_line_image_from_page_image.py
+++ /dev/null
@@ -1,567 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright   2018 Ashish Arora
-# Apache 2.0
-# minimum bounding box part in this script is originally from
-#https://github.com/BebeSparkelSparkel/MinimumBoundingBox
-#https://startupnextdoor.com/computing-convex-hull-in-python/
-""" This module will be used for extracting line images from page image.
- Given the word segmentation (bounding box around a word) for every word, it will
- extract line segmentation. To extract line segmentation, it will take word bounding
- boxes of a line as input, will create a minimum area bounding box that will contain
- all corner points of word bounding boxes. The obtained bounding box (will not necessarily
- be vertically or horizontally aligned). Hence to extract line image from line bounding box,
- page image is rotated and line image is cropped and saved.
-"""
-
-import sys
-import argparse
-import os
-import xml.dom.minidom as minidom
-import numpy as np
-from math import atan2, cos, sin, pi, degrees, sqrt
-from collections import namedtuple
-import random
-from scipy.spatial import ConvexHull
-from PIL import Image
-from scipy.misc import toimage
-parser = argparse.ArgumentParser(description="Creates line images from page image",
-                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
-                                             " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
-                                             " data/local/lines ",
-                                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument('database_path1', type=str,
-                    help='Path to the downloaded madcat data directory 1')
-parser.add_argument('database_path2', type=str,
-                    help='Path to the downloaded madcat data directory 2')
-parser.add_argument('database_path3', type=str,
-                    help='Path to the downloaded madcat data directory 3')
-parser.add_argument('data_splits', type=str,
-                    help='Path to file that contains the train/test/dev split information')
-parser.add_argument('out_dir', type=str,
-                    help='directory location to write output files')
-parser.add_argument('writing_condition1', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 1')
-parser.add_argument('writing_condition2', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 2')
-parser.add_argument('writing_condition3', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 3')
-parser.add_argument('--padding', type=int, default=400,
-                    help='padding across horizontal/verticle direction')
-parser.add_argument('--pixel-scaling', type=int, default=30,
-                    help='padding across horizontal/verticle direction')
-parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="only processes subset of data based on writing condition")
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
-args = parser.parse_args()
-
-"""
-bounding_box is a named tuple which contains:
-             area (float): area of the rectangle
-             length_parallel (float): length of the side that is parallel to unit_vector
-             length_orthogonal (float): length of the side that is orthogonal to unit_vector
-             rectangle_center(int, int): coordinates of the rectangle center
-             (use rectangle_corners to get the corner points of the rectangle)
-             unit_vector (float, float): direction of the length_parallel side.
-             (it's orthogonal vector can be found with the orthogonal_vector function
-             unit_vector_angle (float): angle of the unit vector to be in radians.
-             corner_points [(float, float)]: set that contains the corners of the rectangle
-"""
-
-bounding_box_tuple = namedtuple('bounding_box_tuple', 'area '
-                                        'length_parallel '
-                                        'length_orthogonal '
-                                        'rectangle_center '
-                                        'unit_vector '
-                                        'unit_vector_angle '
-                                        'corner_points'
-                         )
-
-
-def unit_vector(pt0, pt1):
-    """ Given two points pt0 and pt1, return a unit vector that
-        points in the direction of pt0 to pt1.
-    Returns
-    -------
-    (float, float): unit vector
-    """
-    dis_0_to_1 = sqrt((pt0[0] - pt1[0])**2 + (pt0[1] - pt1[1])**2)
-    return (pt1[0] - pt0[0]) / dis_0_to_1, \
-           (pt1[1] - pt0[1]) / dis_0_to_1
-
-
-def orthogonal_vector(vector):
-    """ Given a vector, returns a orthogonal/perpendicular vector of equal length.
-    Returns
-    ------
-    (float, float): A vector that points in the direction orthogonal to vector.
-    """
-    return -1 * vector[1], vector[0]
-
-
-def bounding_area(index, hull):
-    """ Given index location in an array and convex hull, it gets two points
-        hull[index] and hull[index+1]. From these two points, it returns a named
-        tuple that mainly contains area of the box that bounds the hull. This
-        bounding box orintation is same as the orientation of the lines formed
-        by the point hull[index] and hull[index+1].
-    Returns
-    -------
-    a named tuple that contains:
-    area: area of the rectangle
-    length_parallel: length of the side that is parallel to unit_vector
-    length_orthogonal: length of the side that is orthogonal to unit_vector
-    rectangle_center: coordinates of the rectangle center
-    unit_vector: direction of the length_parallel side.
-    (it's orthogonal vector can be found with the orthogonal_vector function)
-    """
-    unit_vector_p = unit_vector(hull[index], hull[index+1])
-    unit_vector_o = orthogonal_vector(unit_vector_p)
-
-    dis_p = tuple(np.dot(unit_vector_p, pt) for pt in hull)
-    dis_o = tuple(np.dot(unit_vector_o, pt) for pt in hull)
-
-    min_p = min(dis_p)
-    min_o = min(dis_o)
-    len_p = max(dis_p) - min_p
-    len_o = max(dis_o) - min_o
-
-    return {'area': len_p * len_o,
-            'length_parallel': len_p,
-            'length_orthogonal': len_o,
-            'rectangle_center': (min_p + len_p / 2, min_o + len_o / 2),
-            'unit_vector': unit_vector_p,
-            }
-
-
-def to_xy_coordinates(unit_vector_angle, point):
-    """ Given angle from horizontal axis and a point from origin,
-        returns converted unit vector coordinates in x, y coordinates.
-        angle of unit vector should be in radians.
-    Returns
-    ------
-    (float, float): converted x,y coordinate of the unit vector.
-    """
-    angle_orthogonal = unit_vector_angle + pi / 2
-    return point[0] * cos(unit_vector_angle) + point[1] * cos(angle_orthogonal), \
-           point[0] * sin(unit_vector_angle) + point[1] * sin(angle_orthogonal)
-
-
-def rotate_points(center_of_rotation, angle, points):
-    """ Rotates a point cloud around the center_of_rotation point by angle
-    input
-    -----
-    center_of_rotation (float, float): angle of unit vector to be in radians.
-    angle (float): angle of rotation to be in radians.
-    points [(float, float)]: Points to be a list or tuple of points. Points to be rotated.
-    Returns
-    ------
-    [(float, float)]: Rotated points around center of rotation by angle
-    """
-    rot_points = []
-    ang = []
-    for pt in points:
-        diff = tuple([pt[d] - center_of_rotation[d] for d in range(2)])
-        diff_angle = atan2(diff[1], diff[0]) + angle
-        ang.append(diff_angle)
-        diff_length = sqrt(sum([d**2 for d in diff]))
-        rot_points.append((center_of_rotation[0] + diff_length * cos(diff_angle),
-                           center_of_rotation[1] + diff_length * sin(diff_angle)))
-
-    return rot_points
-
-
-def rectangle_corners(rectangle):
-    """ Given rectangle center and its inclination, returns the corner
-        locations of the rectangle.
-    Returns
-    ------
-    [(float, float)]: 4 corner points of rectangle.
-    """
-    corner_points = []
-    for i1 in (.5, -.5):
-        for i2 in (i1, -1 * i1):
-            corner_points.append((rectangle['rectangle_center'][0] + i1 * rectangle['length_parallel'],
-                            rectangle['rectangle_center'][1] + i2 * rectangle['length_orthogonal']))
-
-    return rotate_points(rectangle['rectangle_center'], rectangle['unit_vector_angle'], corner_points)
-
-
-def minimum_bounding_box(points):
-    """ Given a list of 2D points, it returns the minimum area rectangle bounding all
-        the points in the point cloud.
-    Returns
-    ------
-    returns a namedtuple that contains:
-    area: area of the rectangle
-    length_parallel: length of the side that is parallel to unit_vector
-    length_orthogonal: length of the side that is orthogonal to unit_vector
-    rectangle_center: coordinates of the rectangle center
-    unit_vector: direction of the length_parallel side. RADIANS
-    unit_vector_angle: angle of the unit vector
-    corner_points: set that contains the corners of the rectangle
-    """
-
-    if len(points) <= 2: raise ValueError('More than two points required.')
-
-    hull_ordered = [points[index] for index in ConvexHull(points).vertices]
-    hull_ordered.append(hull_ordered[0])
-    hull_ordered = tuple(hull_ordered)
-
-    min_rectangle = bounding_area(0, hull_ordered)
-    for i in range(1, len(hull_ordered)-1):
-        rectangle = bounding_area(i, hull_ordered)
-        if rectangle['area'] < min_rectangle['area']:
-            min_rectangle = rectangle
-
-    min_rectangle['unit_vector_angle'] = atan2(min_rectangle['unit_vector'][1], min_rectangle['unit_vector'][0])
-    min_rectangle['rectangle_center'] = to_xy_coordinates(min_rectangle['unit_vector_angle'], min_rectangle['rectangle_center'])
-
-    return bounding_box_tuple(
-        area = min_rectangle['area'],
-        length_parallel = min_rectangle['length_parallel'],
-        length_orthogonal = min_rectangle['length_orthogonal'],
-        rectangle_center = min_rectangle['rectangle_center'],
-        unit_vector = min_rectangle['unit_vector'],
-        unit_vector_angle = min_rectangle['unit_vector_angle'],
-        corner_points = set(rectangle_corners(min_rectangle))
-    )
-
-
-def get_center(im):
-    """ Given image, returns the location of center pixel
-    Returns
-    -------
-    (int, int): center of the image
-    """
-    center_x = im.size[0] / 2
-    center_y = im.size[1] / 2
-    return int(center_x), int(center_y)
-
-
-def get_horizontal_angle(unit_vector_angle):
-    """ Given an angle in radians, returns angle of the unit vector in
-        first or fourth quadrant.
-    Returns
-    ------
-    (float): updated angle of the unit vector to be in radians.
-             It is only in first or fourth quadrant.
-    """
-    if unit_vector_angle > pi / 2 and unit_vector_angle <= pi:
-        unit_vector_angle = unit_vector_angle - pi
-    elif unit_vector_angle > -pi and unit_vector_angle < -pi / 2:
-        unit_vector_angle = unit_vector_angle + pi
-
-    return unit_vector_angle
-
-
-def get_smaller_angle(bounding_box):
-    """ Given a rectangle, returns its smallest absolute angle from horizontal axis.
-    Returns
-    ------
-    (float): smallest angle of the rectangle to be in radians.
-    """
-    unit_vector = bounding_box.unit_vector
-    unit_vector_angle = bounding_box.unit_vector_angle
-    ortho_vector = orthogonal_vector(unit_vector)
-    ortho_vector_angle = atan2(ortho_vector[1], ortho_vector[0])
-
-    unit_vector_angle_updated = get_horizontal_angle(unit_vector_angle)
-    ortho_vector_angle_updated = get_horizontal_angle(ortho_vector_angle)
-
-    if abs(unit_vector_angle_updated) < abs(ortho_vector_angle_updated):
-        return unit_vector_angle_updated
-    else:
-        return ortho_vector_angle_updated
-
-
-def rotated_points(bounding_box, center):
-    """ Given the rectangle, returns corner points of rotated rectangle.
-        It rotates the rectangle around the center by its smallest angle.
-    Returns
-    -------
-    [(int, int)]: 4 corner points of rectangle.
-    """
-    p1, p2, p3, p4 = bounding_box.corner_points
-    x1, y1 = p1
-    x2, y2 = p2
-    x3, y3 = p3
-    x4, y4 = p4
-    center_x, center_y = center
-    rotation_angle_in_rad = -get_smaller_angle(bounding_box)
-    x_dash_1 = (x1 - center_x) * cos(rotation_angle_in_rad) - (y1 - center_y) * sin(rotation_angle_in_rad) + center_x
-    x_dash_2 = (x2 - center_x) * cos(rotation_angle_in_rad) - (y2 - center_y) * sin(rotation_angle_in_rad) + center_x
-    x_dash_3 = (x3 - center_x) * cos(rotation_angle_in_rad) - (y3 - center_y) * sin(rotation_angle_in_rad) + center_x
-    x_dash_4 = (x4 - center_x) * cos(rotation_angle_in_rad) - (y4 - center_y) * sin(rotation_angle_in_rad) + center_x
-
-    y_dash_1 = (y1 - center_y) * cos(rotation_angle_in_rad) + (x1 - center_x) * sin(rotation_angle_in_rad) + center_y
-    y_dash_2 = (y2 - center_y) * cos(rotation_angle_in_rad) + (x2 - center_x) * sin(rotation_angle_in_rad) + center_y
-    y_dash_3 = (y3 - center_y) * cos(rotation_angle_in_rad) + (x3 - center_x) * sin(rotation_angle_in_rad) + center_y
-    y_dash_4 = (y4 - center_y) * cos(rotation_angle_in_rad) + (x4 - center_x) * sin(rotation_angle_in_rad) + center_y
-    return x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4
-
-
-def pad_image(image):
-    """ Given an image, returns a padded image around the border.
-        This routine save the code from crashing if bounding boxes that are
-        slightly outside the page boundary.
-    Returns
-    -------
-    image: page image
-    """
-    offset = int(args.padding // 2)
-    padded_image = Image.new('RGB', (image.size[0] + int(args.padding), image.size[1] + int(args.padding)), "white")
-    padded_image.paste(im = image, box = (offset, offset))
-    return padded_image
-
-
-def update_minimum_bounding_box_input(bounding_box_input):
-    """ Given list of 2D points, returns list of 2D points shifted by an offset.
-    Returns
-    ------
-    points [(float, float)]: points, a list or tuple of 2D coordinates
-    """
-    updated_minimum_bounding_box_input = []
-    offset = int(args.padding // 2)
-    for point in bounding_box_input:
-        x, y = point
-        new_x = x + offset
-        new_y = y + offset
-        word_coordinate = (new_x, new_y)
-        updated_minimum_bounding_box_input.append(word_coordinate)
-
-    return updated_minimum_bounding_box_input
-
-
-def dilate_polygon(points, amount_increase):
-    """ Increases size of polygon given as a list of tuples.
-        Assumes points in polygon are given in CCW
-    """
-    expanded_points = []
-    for index, point in enumerate(points):
-        prev_point = points[(index - 1) % len(points)]
-        next_point = points[(index + 1) % len(points)]
-        prev_edge = np.subtract(point, prev_point)
-        next_edge = np.subtract(next_point, point)
-
-        prev_normal = ((1 * prev_edge[1]), (-1 * prev_edge[0]))
-        prev_normal = np.divide(prev_normal, np.linalg.norm(prev_normal))
-        next_normal = ((1 * next_edge[1]), (-1 * next_edge[0]))
-        next_normal = np.divide(next_normal, np.linalg.norm(next_normal))
-
-        bisect = np.add(prev_normal, next_normal)
-        bisect = np.divide(bisect, np.linalg.norm(bisect))
-
-        cos_theta = np.dot(next_normal, bisect)
-        hyp = amount_increase / cos_theta
-
-        new_point = np.around(point + hyp * bisect)
-        new_point = new_point.astype(int)
-        new_point = new_point.tolist()
-        new_point = tuple(new_point)
-        expanded_points.append(new_point)
-    return expanded_points
-
-
-def set_line_image_data(image, line_id, image_file_name, image_fh):
-    """ Given an image, saves a flipped line image. Line image file name
-        is formed by appending the line id at the end page image name.
-    """
-
-    base_name = os.path.splitext(os.path.basename(image_file_name))[0]
-    line_id = '_' + line_id.zfill(4)
-    line_image_file_name = base_name + line_id + '.png'
-    image_path = os.path.join(args.out_dir, line_image_file_name)
-    imgray = image.convert('L')
-    imgray_rev_arr = np.fliplr(imgray)
-    imgray_rev = toimage(imgray_rev_arr)
-    imgray_rev.save(image_path)
-    image_fh.write(image_path + '\n')
-
-
-def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh):
-    """ Given a page image, extracts the line images from it.
-    Input
-    -----
-    image_file_name (string): complete path and name of the page image.
-    madcat_file_path (string): complete path and name of the madcat xml file
-                                  corresponding to the page image.
-    """
-    im_wo_pad = Image.open(image_file_name)
-    im = pad_image(im_wo_pad)
-    doc = minidom.parse(madcat_file_path)
-    zone = doc.getElementsByTagName('zone')
-    for node in zone:
-        id = node.getAttribute('id')
-        token_image = node.getElementsByTagName('token-image')
-        minimum_bounding_box_input = []
-        for token_node in token_image:
-            word_point = token_node.getElementsByTagName('point')
-            for word_node in word_point:
-                word_coordinate = (int(word_node.getAttribute('x')), int(word_node.getAttribute('y')))
-                minimum_bounding_box_input.append(word_coordinate)
-        updated_mbb_input = update_minimum_bounding_box_input(minimum_bounding_box_input)
-        points_ordered = [updated_mbb_input[index] for index in ConvexHull(updated_mbb_input).vertices]
-        if args.augment:
-            for i in range(0, 3):
-                additional_pixel = random.randint(1, args.pixel_scaling)
-                mar = dilate_polygon(points_ordered, (i-1)*args.pixel_scaling + additional_pixel + 1)
-                bounding_box = minimum_bounding_box(mar)
-                (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
-                min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
-                max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
-                box = (min_x, min_y, max_x, max_y)
-                region_initial = im.crop(box)
-                rot_points = []
-                p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
-                p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
-                rot_points.append(p1)
-                rot_points.append(p2)
-                rot_points.append(p3)
-                rot_points.append(p4)
-
-                cropped_bounding_box = bounding_box_tuple(bounding_box.area,
-                        bounding_box.length_parallel,
-                        bounding_box.length_orthogonal,
-                        bounding_box.length_orthogonal,
-                        bounding_box.unit_vector,
-                        bounding_box.unit_vector_angle,
-                        set(rot_points)
-                    )
-
-                rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
-                img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
-                x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
-                    cropped_bounding_box, get_center(region_initial))
-
-                min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-                min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-                max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-                max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-                box = (min_x, min_y, max_x, max_y)
-                region_final = img2.crop(box)
-                line_id = id + '_scale' + str(i)
-                set_line_image_data(region_final, line_id, image_file_name, image_fh)
-        else:
-            bounding_box = minimum_bounding_box(points_ordered)
-            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bounding_box.corner_points
-            min_x, min_y = int(min(x1, x2, x3, x4)), int(min(y1, y2, y3, y4))
-            max_x, max_y = int(max(x1, x2, x3, x4)), int(max(y1, y2, y3, y4))
-            box = (min_x, min_y, max_x, max_y)
-            region_initial = im.crop(box)
-            rot_points = []
-            p1, p2 = (x1 - min_x, y1 - min_y), (x2 - min_x, y2 - min_y)
-            p3, p4 = (x3 - min_x, y3 - min_y), (x4 - min_x, y4 - min_y)
-            rot_points.append(p1)
-            rot_points.append(p2)
-            rot_points.append(p3)
-            rot_points.append(p4)
-
-            cropped_bounding_box = bounding_box_tuple(bounding_box.area,
-                    bounding_box.length_parallel,
-                    bounding_box.length_orthogonal,
-                    bounding_box.length_orthogonal,
-                    bounding_box.unit_vector,
-                    bounding_box.unit_vector_angle,
-                    set(rot_points)
-                )
-
-            rotation_angle_in_rad = get_smaller_angle(cropped_bounding_box)
-            img2 = region_initial.rotate(degrees(rotation_angle_in_rad), resample = Image.BICUBIC)
-            x_dash_1, y_dash_1, x_dash_2, y_dash_2, x_dash_3, y_dash_3, x_dash_4, y_dash_4 = rotated_points(
-                cropped_bounding_box, get_center(region_initial))
-
-            min_x = int(min(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-            min_y = int(min(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-            max_x = int(max(x_dash_1, x_dash_2, x_dash_3, x_dash_4))
-            max_y = int(max(y_dash_1, y_dash_2, y_dash_3, y_dash_4))
-            box = (min_x, min_y, max_x, max_y)
-            region_final = img2.crop(box)
-            set_line_image_data(region_final, id, image_file_name, image_fh)
-
-
-def check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3):
-    """ Returns the complete path of the page image and corresponding
-        xml file.
-    Returns
-    -------
-    image_file_name (string): complete path and name of the page image.
-    madcat_file_path (string): complete path and name of the madcat xml file
-                               corresponding to the page image.
-    """
-    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
-    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
-    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
-
-    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
-    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
-    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
-
-    if os.path.exists(madcat_file_path1):
-        return madcat_file_path1, image_file_path1, wc_dict1
-
-    if os.path.exists(madcat_file_path2):
-        return madcat_file_path2, image_file_path2, wc_dict2
-
-    if os.path.exists(madcat_file_path3):
-        return madcat_file_path3, image_file_path3, wc_dict3
-
-    return None, None, None
-
-
-def parse_writing_conditions(writing_conditions):
-    """ Given writing condition file path, returns a dictionary which have writing condition
-        of each page image.
-    Returns
-    ------
-    (dict): dictionary with key as page image name and value as writing condition.
-    """
-    with open(writing_conditions) as f:
-        file_writing_cond = dict()
-        for line in f:
-            line_list = line.strip().split("\t")
-            file_writing_cond[line_list[0]] = line_list[3]
-    return file_writing_cond
-
-
-def check_writing_condition(wc_dict, base_name):
-    """ Given writing condition dictionary, checks if a page image is writing
-        in a specifed writing condition.
-        It is used to create subset of dataset based on writing condition.
-    Returns
-    (bool): True if writing condition matches.
-    """
-    if args.subset:
-        writing_condition = wc_dict[base_name].strip()
-        if writing_condition != 'IUC':
-            return False
-    else:
-        return True
-
-### main ###
-def main():
-
-    wc_dict1 = parse_writing_conditions(args.writing_condition1)
-    wc_dict2 = parse_writing_conditions(args.writing_condition2)
-    wc_dict3 = parse_writing_conditions(args.writing_condition3)
-    output_directory = args.out_dir
-    image_file = os.path.join(output_directory, 'images.scp')
-    image_fh = open(image_file, 'w', encoding='utf-8')
-
-    splits_handle = open(args.data_splits, 'r')
-    splits_data = splits_handle.read().strip().split('\n')
-    prev_base_name = ''
-    for line in splits_data:
-        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
-        if prev_base_name != base_name:
-            prev_base_name = base_name
-            madcat_file_path, image_file_path, wc_dict = check_file_location(base_name, wc_dict1, wc_dict2, wc_dict3)
-            if wc_dict is None or not check_writing_condition(wc_dict, base_name):
-                continue
-            get_line_images_from_page_image(image_file_path, madcat_file_path, image_fh)
-
-
-if __name__ == '__main__':
-      main()
-
diff --git a/egs/madcat_ar/v1/local/tl/process_data.py b/egs/madcat_ar/v1/local/tl/process_data.py
deleted file mode 100755
index c21beb1be70..00000000000
--- a/egs/madcat_ar/v1/local/tl/process_data.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright  2018  Ashish Arora
-
-""" This script reads MADCAT files and creates the following files (for the
-    data subset selected via --dataset) :text, utt2spk, images.scp.
-  Eg. local/process_data.py data/local /export/corpora/LDC/LDC2012T15 /export/corpora/LDC/LDC2013T09
-      /export/corpora/LDC/LDC2013T15 data/download/data_splits/madcat.train.raw.lineid
-      data/dev data/local/lines/images.scp
-  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 وجه وعقل غارق حتّى النخاع
-      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
-      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
-      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
-"""
-
-import argparse
-import os
-import sys
-import xml.dom.minidom as minidom
-import unicodedata
-
-parser = argparse.ArgumentParser(description="Creates text, utt2spk and images.scp files",
-                                 epilog="E.g.  " + sys.argv[0] + "  data/LDC2012T15"
-                                 " data/LDC2013T09 data/LDC2013T15 data/madcat.train.raw.lineid "
-                                 " data/train data/local/lines ",
-                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument('database_path1', type=str,
-                    help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('database_path2', type=str,
-                    help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('database_path3', type=str,
-                    help='Path to the downloaded (and extracted) madcat data')
-parser.add_argument('data_splits', type=str,
-                    help='Path to file that contains the train/test/dev split information')
-parser.add_argument('out_dir', type=str,
-                    help='directory location to write output files.')
-parser.add_argument('images_scp_path', type=str,
-                    help='Path of input images.scp file(maps line image and location)')
-parser.add_argument('writing_condition1', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 1')
-parser.add_argument('writing_condition2', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 2')
-parser.add_argument('writing_condition3', type=str,
-                    help='Path to the downloaded (and extracted) writing conditions file 3')
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
-parser.add_argument("--subset", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="only processes subset of data based on writing condition")
-args = parser.parse_args()
-
-
-def check_file_location():
-    """ Returns the complete path of the page image and corresponding
-        xml file.
-    Args:
-    Returns:
-        image_file_name (string): complete path and name of the page image.
-        madcat_file_path (string): complete path and name of the madcat xml file
-                                  corresponding to the page image.
-    """
-    madcat_file_path1 = os.path.join(args.database_path1, 'madcat', base_name + '.madcat.xml')
-    madcat_file_path2 = os.path.join(args.database_path2, 'madcat', base_name + '.madcat.xml')
-    madcat_file_path3 = os.path.join(args.database_path3, 'madcat', base_name + '.madcat.xml')
-
-    image_file_path1 = os.path.join(args.database_path1, 'images', base_name + '.tif')
-    image_file_path2 = os.path.join(args.database_path2, 'images', base_name + '.tif')
-    image_file_path3 = os.path.join(args.database_path3, 'images', base_name + '.tif')
-
-    if os.path.exists(madcat_file_path1):
-        return madcat_file_path1, image_file_path1, wc_dict1
-
-    if os.path.exists(madcat_file_path2):
-        return madcat_file_path2, image_file_path2, wc_dict2
-
-    if os.path.exists(madcat_file_path3):
-        return madcat_file_path3, image_file_path3, wc_dict3
-
-    return None, None, None
-
-
-def parse_writing_conditions(writing_conditions):
-    """ Returns a dictionary which have writing condition of each page image.
-    Args:
-         writing_conditions(string): complete path of writing condition file.
-    Returns:
-        (dict): dictionary with key as page image name and value as writing condition.
-    """
-    with open(writing_conditions) as f:
-        file_writing_cond = dict()
-        for line in f:
-            line_list = line.strip().split("\t")
-            file_writing_cond[line_list[0]] = line_list[3]
-    return file_writing_cond
-
-
-def check_writing_condition(wc_dict):
-    """ Checks if a given page image is writing in a given writing condition.
-        It is used to create subset of dataset based on writing condition.
-    Args:
-         wc_dict (dict): dictionary with key as page image name and value as writing condition.
-    Returns:
-        (bool): True if writing condition matches.
-    """
-    if args.subset:
-        writing_condition = wc_dict[base_name].strip()
-        if writing_condition != 'IUC':
-            return False
-    else:
-        return True
-
-
-def read_text(madcat_file_path):
-    """ Maps every word in the page image to a  corresponding line.
-    Args:
-        madcat_file_path (string): complete path and name of the madcat xml file
-                                  corresponding to the page image.
-    Returns:
-        dict: Mapping every word in the page image to a  corresponding line.
-    """
-
-    word_line_dict = dict()
-    doc = minidom.parse(madcat_file_path)
-    zone = doc.getElementsByTagName('zone')
-    for node in zone:
-        line_id = node.getAttribute('id')
-        word_image = node.getElementsByTagName('token-image')
-        for tnode in word_image:
-            word_id = tnode.getAttribute('id')
-            word_line_dict[word_id] = line_id
-
-    text_line_word_dict = dict()
-    segment = doc.getElementsByTagName('segment')
-    for node in segment:
-        token = node.getElementsByTagName('token')
-        for tnode in token:
-            ref_word_id = tnode.getAttribute('ref_id')
-            word = tnode.getElementsByTagName('source')[0].firstChild.nodeValue
-            ref_line_id = word_line_dict[ref_word_id]
-            if ref_line_id not in text_line_word_dict:
-                text_line_word_dict[ref_line_id] = list()
-            text_line_word_dict[ref_line_id].append(word)
-    return text_line_word_dict
-
-
-def get_line_image_location():
-    image_loc_dict = dict()  # Stores image base name and location
-    image_loc_vect = input_image_fh.read().strip().split("\n")
-    for line in image_loc_vect:
-        base_name = os.path.basename(line)
-        location_vect = line.split('/')
-        location = "/".join(location_vect[:-1])
-        image_loc_dict[base_name]=location
-    return image_loc_dict
-
-
-### main ###
-print("Processing '{}' data...".format(args.out_dir))
-
-text_file = os.path.join(args.out_dir, 'text')
-text_fh = open(text_file, 'w', encoding='utf-8')
-utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
-utt2spk_fh = open(utt2spk_file, 'w', encoding='utf-8')
-image_file = os.path.join(args.out_dir, 'images.scp')
-image_fh = open(image_file, 'w', encoding='utf-8')
-
-input_image_file = args.images_scp_path
-input_image_fh = open(input_image_file, 'r', encoding='utf-8')
-
-wc_dict1 = parse_writing_conditions(args.writing_condition1)
-wc_dict2 = parse_writing_conditions(args.writing_condition2)
-wc_dict3 = parse_writing_conditions(args.writing_condition3)
-image_loc_dict = get_line_image_location()
-
-image_num = 0
-with open(args.data_splits) as f:
-    prev_base_name = ''
-    for line in f:
-        base_name = os.path.splitext(os.path.splitext(line.split(' ')[0])[0])[0]
-        if prev_base_name != base_name:
-            prev_base_name = base_name
-            madcat_xml_path, image_file_path, wc_dict = check_file_location()
-            if wc_dict is None or not check_writing_condition(wc_dict):
-                continue
-            madcat_doc = minidom.parse(madcat_xml_path)
-            writer = madcat_doc.getElementsByTagName('writer')
-            writer_id = writer[0].getAttribute('id')
-            text_line_word_dict = read_text(madcat_xml_path)
-            base_name = os.path.basename(image_file_path).split('.tif')[0]
-            for line_id in sorted(text_line_word_dict):
-                if args.augment:
-                    key = (line_id + '.')[:-1]
-                    for i in range(0, 3):
-                        location_id = '_' + line_id + '_scale' + str(i)
-                        line_image_file_name = base_name + location_id + '.png'
-                        location = image_loc_dict[line_image_file_name]
-                        image_file_path = os.path.join(location, line_image_file_name)
-                        line = text_line_word_dict[key]
-                        text = ' '.join(line)
-                        base_line_image_file_name = line_image_file_name.split('.png')[0]
-                        utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_line_image_file_name
-                        text_fh.write(utt_id + ' ' + text + '\n')
-                        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-                        image_fh.write(utt_id + ' ' + image_file_path + '\n')
-                        image_num += 1
-                else:
-                    updated_base_name = base_name + '_' + str(line_id).zfill(4) +'.png'
-                    location = image_loc_dict[updated_base_name]
-                    image_file_path = os.path.join(location, updated_base_name)
-                    line = text_line_word_dict[line_id]
-                    text = ' '.join(line)
-                    utt_id = writer_id + '_' + str(image_num).zfill(6) + '_' + base_name + '_' + str(line_id).zfill(4)
-                    text_fh.write(utt_id + ' ' + text + '\n')
-                    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-                    image_fh.write(utt_id + ' ' + image_file_path + '\n')
-                    image_num += 1

From 0234a1aabbb4242dc8dfc9364c8fe0e8dea2aa68 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 19:34:54 -0400
Subject: [PATCH 33/67] adding chain recepi

---
 .../v1/local/tl/chain/run_cnn_e2eali.sh       | 244 ++++++++++++++++++
 .../v1/local/tl/chain/run_e2e_cnn.sh          | 172 ++++++++++++
 .../v1/local/tl/run_textlocalization.sh       |   4 +-
 3 files changed, 418 insertions(+), 2 deletions(-)
 create mode 100755 egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
 create mode 100755 egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh

diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
new file mode 100755
index 00000000000..3f2a0dd6e37
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+
+# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
+# lattice alignments and to build a tree
+
+# local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b
+# System                      cnn_e2eali_1b
+# WER                             10.78
+# CER                              2.99
+# Final train prob              -0.0587
+# Final valid prob              -0.0609
+# Final train prob (xent)       -0.4471
+# Final valid prob (xent)       -0.4653
+# Parameters                      3.37M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
+#exp/chain/cnn_e2eali_1b: num-iters=179 nj=8..16 num-params=3.4M dim=40->416 combine=-0.058->-0.058 (over 3) xent:train/valid[118,178,final]=(-0.463,-0.445,-0.447/-0.477,-0.462,-0.465) logprob:train/valid[118,178,final]=(-0.062,-0.059,-0.059/-0.063,-0.061,-0.061)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=true
+lang_test=lang_test
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=56 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=2 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=16 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
+
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
new file mode 100755
index 00000000000..d43c1f1a0f3
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# Copyright    2017  Hossein Hadian
+
+# This script does end2end chain training (i.e. from scratch)
+
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# System                      e2e_cnn_1a
+# WER                             10.71
+# CER                              2.85
+# Final train prob              -0.0859
+# Final valid prob              -0.1266
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      2.94M
+
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
+# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
+
+set -e
+
+
+# configs for 'chain'
+stage=0
+nj=30
+train_stage=-10
+get_egs_stage=-10
+affix=1a
+
+# training options
+tdnn_dim=450
+num_epochs=2
+num_jobs_initial=3
+num_jobs_final=16
+minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
+common_egs_dir=
+l2_regularize=0.00005
+frames_per_iter=1000000
+cmvn_opts="--norm-means=false --norm-vars=false"
+train_set=train
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+lang=data/lang_e2e
+treedir=exp/chain/e2e_monotree  # it's actually just a trivial tree (no tree building)
+dir=exp/chain/e2e_cnn_${affix}
+
+if [ $stage -le 0 ]; then
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  rm -rf $lang
+  cp -r data/lang $lang
+  silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+  nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+  # Use our special topology... note that later on may have to tune this
+  # topology.
+  steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+fi
+
+if [ $stage -le 1 ]; then
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
+                                       --shared-phones true \
+                                       --type mono \
+                                       data/$train_set $lang $treedir
+  $cmd $treedir/log/make_phone_lm.log \
+  cat data/$train_set/text \| \
+    steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
+    utils/sym2int.pl -f 2- data/lang/phones.txt \| \
+    chain-est-phone-lm --num-extra-lm-states=500 \
+                       ark:- $treedir/phone_lm.fst
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: creating neural net configs using the xconfig parser";
+  num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=56 name=input
+  conv-relu-batchnorm-layer name=cnn1 height-in=56 height-out=56 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=56 height-out=28 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=28 height-out=28 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=28 height-out=14 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=14 height-out=14 time-offsets=-4,0,4 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+EOF
+
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
+fi
+
+if [ $stage -le 3 ]; then
+  # no need to store the egs in a shared storage because we always
+  # remove them. Anyway, it takes only 5 minutes to generate them.
+
+  steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
+    --cmd "$cmd" \
+    --feat.cmvn-opts "$cmvn_opts" \
+    --chain.leaky-hmm-coefficient 0.1 \
+    --chain.l2-regularize $l2_regularize \
+    --chain.apply-deriv-weights false \
+    --egs.dir "$common_egs_dir" \
+    --egs.stage $get_egs_stage \
+    --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
+    --chain.frame-subsampling-factor 4 \
+    --chain.alignment-subsampling-factor 4 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
+    --trainer.num-chunk-per-minibatch $minibatch_size \
+    --trainer.frames-per-iter $frames_per_iter \
+    --trainer.num-epochs $num_epochs \
+    --trainer.optimization.momentum 0 \
+    --trainer.optimization.num-jobs-initial $num_jobs_initial \
+    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.initial-effective-lrate 0.001 \
+    --trainer.optimization.final-effective-lrate 0.0001 \
+    --trainer.optimization.shrink-value 1.0 \
+    --trainer.max-param-change 2.0 \
+    --cleanup.remove-egs true \
+    --feat-dir data/${train_set} \
+    --tree-dir $treedir \
+    --dir $dir  || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
+fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index cd5c96e368e..7a49e2ebb76 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -117,7 +117,7 @@ fi
 
 if [ $stage -le 4 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_flatstart_cnn1a.sh --nj $nj
+  local/chain/run_e2e_cnn.sh --nj $nj
 fi
 
 if [ $stage -le 5 ]; then
@@ -130,5 +130,5 @@ fi
 
 if [ $stage -le 6 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali_1b.sh --nj $nj
+  local/chain/run_cnn_e2eali.sh --nj $nj
 fi

From a17fbb3e260e6957dcc7b0854aee1939932ef833 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 19:43:17 -0400
Subject: [PATCH 34/67] minor fix

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 7a49e2ebb76..d4db24ce0cb 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -117,7 +117,7 @@ fi
 
 if [ $stage -le 4 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_e2e_cnn.sh --nj $nj
+  local/tl/chain/run_e2e_cnn.sh --nj $nj
 fi
 
 if [ $stage -le 5 ]; then
@@ -130,5 +130,5 @@ fi
 
 if [ $stage -le 6 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali.sh --nj $nj
+  local/tl/chain/run_cnn_e2eali.sh --nj $nj
 fi

From 59c84f2428569ad884a2407c2427401e0b11ca5a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 19:48:42 -0400
Subject: [PATCH 35/67] bug fix

---
 egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index 3f2a0dd6e37..3e03473faef 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -41,7 +41,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=true
-lang_test=lang_test
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -226,7 +227,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -240,5 +241,10 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir

From a23b478a269e0f9ac3be4e6a7040aa8848e21493 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 21:57:52 -0400
Subject: [PATCH 36/67] fixing bugs

---
 egs/madcat_ar/v1/local/extract_lines.sh           |  4 +++-
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 14 +++++++-------
 egs/madcat_ar/v1/run_end2end.sh                   |  6 +++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/egs/madcat_ar/v1/local/extract_lines.sh b/egs/madcat_ar/v1/local/extract_lines.sh
index 50129ad38c9..ab87836ae3a 100755
--- a/egs/madcat_ar/v1/local/extract_lines.sh
+++ b/egs/madcat_ar/v1/local/extract_lines.sh
@@ -11,6 +11,8 @@ writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_split_file=data/download/data_splits/madcat.dev.raw.lineid
 data=data/local/dev
+subset=false
+augment=false
 echo "$0 $@"
 
 . ./cmd.sh
@@ -35,7 +37,7 @@ done
 $cmd JOB=1:$nj $log_dir/extract_lines.JOB.log \
   local/create_line_image_from_page_image.py $download_dir1 $download_dir2 $download_dir3 \
   $log_dir/lines.JOB.scp $data/JOB $writing_condition1 $writing_condition2 $writing_condition3 \
-  || exit 1;
+  --subset $subset --augment $augment || exit 1;
 
 ## concatenate the .scp files together.
 for n in $(seq $nj); do
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index d4db24ce0cb..845986224d1 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -16,7 +16,8 @@ writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
 overwrite=false
-
+subset=true
+augment=true
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -37,21 +38,21 @@ if [ $stage -le 0 ]; then
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
 
-  for dataset in train dev; do
-    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+  for set in train dev; do
+    data_split_file=$data_splits_dir/madcat.$set.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
         --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset
+        --data data/local/$set --subset $subset --augment $augment || exit 1
   done
 
   echo "$0: Preparing data..."
   for set in dev train; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 --augment true || exit 1
-      data/local/splits/${set}.txt data/${set}
+      $writing_condition1 $writing_condition2 $writing_condition3 \
+      data/local/splits/${set}.txt data/${set} --augment $augment --subset $subset || exit 1
     image/fix_data_dir.sh data/${set}
   done
 
@@ -70,7 +71,6 @@ if [ $stage -le 1 ]; then
   done
   echo "$0: Fixing data directory for train dataset $(date)."
   image/fix_data_dir.sh data/train
-
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 6ab6e8ff32d..0e9be93be61 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -43,15 +43,15 @@ if [ $stage -le 0 ]; then
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
         --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset
+        --data data/local/$dataset || exit 1
   done
 
   echo "$0: Preparing data..."
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
-      data/local/splits/${set}.txt data/${set}
+      $writing_condition1 $writing_condition2 $writing_condition3 \
+      data/local/splits/${set}.txt data/${set} || exit 1
     image/fix_data_dir.sh data/${set}
   done
 

From a3aac1abdca709f44973b3ba85cc08f88cab8b40 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 22:01:26 -0400
Subject: [PATCH 37/67] fixing bugs

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 3 +--
 egs/madcat_ar/v1/run_end2end.sh                   | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 845986224d1..340dcd71fb2 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -51,8 +51,7 @@ if [ $stage -le 0 ]; then
   for set in dev train; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 \
-      data/local/splits/${set}.txt data/${set} --augment $augment --subset $subset || exit 1
+      $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset
     image/fix_data_dir.sh data/${set}
   done
 
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 0e9be93be61..342b0d69597 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -50,8 +50,7 @@ if [ $stage -le 0 ]; then
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 \
-      data/local/splits/${set}.txt data/${set} || exit 1
+      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
     image/fix_data_dir.sh data/${set}
   done
 

From 8fc860d9fe4f2c2c15ef126e5bce4456dd8fa613 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 19 Sep 2018 22:11:10 -0400
Subject: [PATCH 38/67] bug fix

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh |  4 +++-
 egs/madcat_ar/v1/run_end2end.sh                   | 12 +++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 340dcd71fb2..c725871c964 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -3,7 +3,7 @@
 #           2018    Ashish Arora
 set -e
 stage=0
-nj=30
+nj=70
 # download_dir{1,2,3} points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # This corpus can be purchased here:
@@ -15,6 +15,7 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+images_scp_dir=data/local
 overwrite=false
 subset=true
 augment=true
@@ -114,6 +115,7 @@ if [ $stage -le 3 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
+nj=30
 if [ $stage -le 4 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
   local/tl/chain/run_e2e_cnn.sh --nj $nj
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index 342b0d69597..a5496a503be 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -15,8 +15,10 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+images_scp_dir=data/local
 overwrite=false
-
+subset=true
+augment=true
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -37,20 +39,20 @@ if [ $stage -le 0 ]; then
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
 
-  for dataset in test train dev; do
-    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+  for set in test train dev; do
+    data_split_file=$data_splits_dir/madcat.$set.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
         --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset || exit 1
+        --data data/local/$set --subset $subset --augment $augment || exit 1
   done
 
   echo "$0: Preparing data..."
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
+      $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset
     image/fix_data_dir.sh data/${set}
   done
 

From cafd89ad9352e20a3c43c915a9f42542b7314a76 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 02:19:43 -0400
Subject: [PATCH 39/67] fixing bug in subset

---
 egs/madcat_ar/v1/local/create_line_image_from_page_image.py | 2 ++
 egs/madcat_ar/v1/local/process_data.py                      | 2 ++
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh           | 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index bb126c39538..778555c427e 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -536,6 +536,8 @@ def check_writing_condition(wc_dict, base_name):
         writing_condition = wc_dict[base_name].strip()
         if writing_condition != 'IUC':
             return False
+        else:
+            return True
     else:
         return True
 
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index c21beb1be70..e476b67cb96 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -105,6 +105,8 @@ def check_writing_condition(wc_dict):
         writing_condition = wc_dict[base_name].strip()
         if writing_condition != 'IUC':
             return False
+        else:
+            return True
     else:
         return True
 
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index c725871c964..473f463d77f 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -18,7 +18,7 @@ data_splits_dir=data/download/data_splits
 images_scp_dir=data/local
 overwrite=false
 subset=true
-augment=true
+augment=false
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -47,7 +47,7 @@ if [ $stage -le 0 ]; then
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
         --data data/local/$set --subset $subset --augment $augment || exit 1
   done
-
+ 
   echo "$0: Preparing data..."
   for set in dev train; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \

From 87c9241186ba01dde184e5ac5414fcfd0f0aeade Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 02:24:19 -0400
Subject: [PATCH 40/67] adding augmentation in text localization

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 473f463d77f..8ced7d37af9 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -56,15 +56,15 @@ if [ $stage -le 0 ]; then
     image/fix_data_dir.sh data/${set}
   done
 
-  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
-  image/fix_data_dir.sh data/test
+  #local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+  #image/fix_data_dir.sh data/test
 fi
 
 if [ $stage -le 1 ]; then
   echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in dev test train; do
+  for set in dev train; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
@@ -87,7 +87,7 @@ if [ $stage -le 3 ]; then
     utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
-  for set in test train dev; do
+  for set in test train dev train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
       utils/lang/bpe/prepend_words.py | \
@@ -118,7 +118,7 @@ fi
 nj=30
 if [ $stage -le 4 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/tl/chain/run_e2e_cnn.sh --nj $nj
+  local/tl/chain/run_e2e_cnn.sh --nj $nj --train_set train_aug
 fi
 
 if [ $stage -le 5 ]; then
@@ -126,10 +126,10 @@ if [ $stage -le 5 ]; then
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
                        --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+                       data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
 if [ $stage -le 6 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/tl/chain/run_cnn_e2eali.sh --nj $nj
+  local/tl/chain/run_cnn_e2eali.sh --nj $nj --train_set train_aug
 fi

From 0e74e5562edc0403a203a45a35432d7659969238 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 11:30:29 -0400
Subject: [PATCH 41/67] fixing bugs

---
 egs/madcat_ar/v1/local/extract_features.sh       | 2 +-
 egs/madcat_ar/v1/local/{tl => }/make_features.py | 0
 egs/madcat_ar/v1/run_end2end.sh                  | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename egs/madcat_ar/v1/local/{tl => }/make_features.py (100%)

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 4ed6ba04348..1741ad3f9b2 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -36,7 +36,7 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  image/ocr/make_features.py $logdir/images.JOB.scp \
+  local/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
     --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
diff --git a/egs/madcat_ar/v1/local/tl/make_features.py b/egs/madcat_ar/v1/local/make_features.py
similarity index 100%
rename from egs/madcat_ar/v1/local/tl/make_features.py
rename to egs/madcat_ar/v1/local/make_features.py
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index a5496a503be..bee203d1483 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -74,7 +74,7 @@ fi
 
 if [ $stage -le 2 ]; then
   echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | utilis/lang/bpe/reverse.py | \
+  cut -d' ' -f2- data/train/text | utils/lang/bpe/reverse.py | \
     utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 

From 60915aa65c306673270e5c4b91935162046bb934 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 11:32:11 -0400
Subject: [PATCH 42/67] fixing bugs

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 8ced7d37af9..d59ad006886 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -76,7 +76,7 @@ fi
 if [ $stage -le 2 ]; then
   for set in train; do
     echo "$(date) stage 2: Performing augmentation, it will double training data"
-    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    local/tl/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
 fi

From 4099d4ad08d509ba676a437fbb5f9197e5c020ed Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 11:37:25 -0400
Subject: [PATCH 43/67] fixing bugs

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index d59ad006886..00f8f176f4f 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -83,11 +83,11 @@ fi
 
 if [ $stage -le 3 ]; then
   echo "$0: Preparing BPE..."
-  cut -d' ' -f2- data/train/text | utilis/lang/bpe/reverse.py | \
+  cut -d' ' -f2- data/train/text | utils/lang/bpe/reverse.py | \
     utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
-  for set in test train dev train_aug; do
+  for set in train dev train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
       utils/lang/bpe/prepend_words.py | \

From 4f98f69f7ca922a0ee0f3d8193a8d943829b4e31 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 13:46:43 -0400
Subject: [PATCH 44/67] fixing bugs

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 00f8f176f4f..d263c34a838 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -56,15 +56,15 @@ if [ $stage -le 0 ]; then
     image/fix_data_dir.sh data/${set}
   done
 
-  #local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
-  #image/fix_data_dir.sh data/test
+  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+  image/fix_data_dir.sh data/test
 fi
 
 if [ $stage -le 1 ]; then
   echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in dev train; do
+  for set in test dev train; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
@@ -87,7 +87,7 @@ if [ $stage -le 3 ]; then
     utils/lang/bpe/prepend_words.py | \
     utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
 
-  for set in train dev train_aug; do
+  for set in test train dev train_aug; do
     cut -d' ' -f1 data/$set/text > data/$set/ids
     cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
       utils/lang/bpe/prepend_words.py | \

From 717501f9c83c44b3e328c64a04d76c9991822773 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 22:58:32 -0400
Subject: [PATCH 45/67] fixing bugs

---
 egs/madcat_ar/v1/local/extract_features.sh        | 3 +--
 egs/madcat_ar/v1/local/tl/augment_data.sh         | 2 +-
 egs/madcat_ar/v1/local/tl/process_waldo_data.py   | 5 +----
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 5 +++--
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 1741ad3f9b2..7df6385d9c9 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -10,7 +10,6 @@ nj=4
 cmd=run.pl
 feat_dim=40
 augment=false
-fliplr=false
 echo "$0 $@"
 
 . ./cmd.sh
@@ -38,7 +37,7 @@ utils/split_scp.pl $scp $split_scps || exit 1;
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
   local/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
+    --feat-dim $feat_dim --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
index 31e4a8217ca..e49112c9987 100755
--- a/egs/madcat_ar/v1/local/tl/augment_data.sh
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -26,7 +26,7 @@ for set in $aug_set; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
-    --fliplr false --augment true $datadir/augmentations/$set
+    --augment true $datadir/augmentations/$set
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/madcat_ar/v1/local/tl/process_waldo_data.py b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
index df8b6c5149f..b7a24807c5a 100755
--- a/egs/madcat_ar/v1/local/tl/process_waldo_data.py
+++ b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
@@ -29,8 +29,6 @@ def read_image_text(image_text_path):
             image_path = line_vect[0]
             line_id = os.path.basename(image_path).split('.png')[0]
             transcription = line_vect[1:]
-            #transcription = " ".join(transcription)
-            #image_transcription_dict[line_id] = transcription
             joined_transcription = list()
             for word in transcription:
                 joined_transcription.append(word)
@@ -41,7 +39,6 @@ def read_image_text(image_text_path):
 
 ### main ###
 print("Processing '{}' data...".format(args.out_dir))
-
 text_file = os.path.join(args.out_dir, 'text')
 text_fh = open(text_file, 'w', encoding='utf-8')
 utt2spk_file = os.path.join(args.out_dir, 'utt2spk')
@@ -50,7 +47,7 @@ def read_image_text(image_text_path):
 image_fh = open(image_file, 'w', encoding='utf-8')
 
 image_transcription_dict = read_image_text(args.image_transcription_file)
-for line_id in image_transcription_dict:
+for line_id in sorted(image_transcription_dict.keys()):
         writer_id = line_id.strip().split('_')[-3]
         updated_line_id = line_id + '.png'
         image_file_path = os.path.join('lines', updated_line_id)
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index d263c34a838..9c7e5c7ab58 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -56,8 +56,9 @@ if [ $stage -le 0 ]; then
     image/fix_data_dir.sh data/${set}
   done
 
-  local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
-  image/fix_data_dir.sh data/test
+  local/tl/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+  utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
+  #image/fix_data_dir.sh data/test
 fi
 
 if [ $stage -le 1 ]; then

From 56c77c4523723e5d79f43bfffb82015c0048f104 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 23:16:24 -0400
Subject: [PATCH 46/67] fixing bugs

---
 egs/madcat_ar/v1/local/make_features.py           | 4 ++--
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 4 ++--
 egs/madcat_ar/v1/run_end2end.sh                   | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
index e9d10ecc87e..1dbefe48f64 100755
--- a/egs/madcat_ar/v1/local/make_features.py
+++ b/egs/madcat_ar/v1/local/make_features.py
@@ -158,9 +158,9 @@ def vertical_shift(im, mode='mid'):
             num_fail += 1
             continue
         if args.augment:
-            im_shift = vertical_shift(im_horizontal_padded, shift_setting[1])
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
         else:
-            im_shift = vertical_shift(im_horizontal_padded, shift_setting[0])
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
         data = np.transpose(im_shift, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 9c7e5c7ab58..e15aba27888 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -65,8 +65,8 @@ if [ $stage -le 1 ]; then
   echo "$0: Obtaining image groups. calling get_image2num_frames $(date)."
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  for set in test dev train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
+  for set in dev train test; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index bee203d1483..ccb177a6896 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -63,10 +63,10 @@ if [ $stage -le 1 ]; then
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
 
-  for dataset in test train; do
-    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $dataset. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
-    steps/compute_cmvn_stats.sh data/$dataset || exit 1;
+  for set in test train; do
+    echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
   echo "$0: Fixing data directory for train dataset $(date)."
   utils/fix_data_dir.sh data/train

From b9d26513435aa58bcb3791eb75548306145d6d46 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 23:26:18 -0400
Subject: [PATCH 47/67] fixing bugs

---
 .../v1/local/tl/run_textlocalization.sh          | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index e15aba27888..fd18a895232 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -107,22 +107,20 @@ if [ $stage -le 3 ]; then
   utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
-  local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang
-  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                               data/lang data/lang_rescore_6g
+  local/train_lm.sh --order 3
+  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+                               data/lang data/lang
 fi
 
 nj=30
-if [ $stage -le 4 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Calling the flat-start chain recipe... $(date)."
   local/tl/chain/run_e2e_cnn.sh --nj $nj --train_set train_aug
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Aligning the training data using the e2e chain model...$(date)."
   steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
                        --use-gpu false \
@@ -130,7 +128,7 @@ if [ $stage -le 5 ]; then
                        data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
   local/tl/chain/run_cnn_e2eali.sh --nj $nj --train_set train_aug
 fi

From 74f7a82b5e986401bc0bc90d3ea36f1f3eac7781 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 20 Sep 2018 23:40:17 -0400
Subject: [PATCH 48/67] fixing bugs

---
 .../v1/local/tl/run_textlocalization.sh       |   2 +-
 egs/madcat_ar/v1/local/tl/train_lm.sh         | 102 ++++++++++++++++++
 2 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100755 egs/madcat_ar/v1/local/tl/train_lm.sh

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index fd18a895232..fc18e52e58f 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -109,7 +109,7 @@ fi
 
 if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
-  local/train_lm.sh --order 3
+  local/tl/train_lm.sh --order 3
   utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
                                data/lang data/lang
 fi
diff --git a/egs/madcat_ar/v1/local/tl/train_lm.sh b/egs/madcat_ar/v1/local/tl/train_lm.sh
new file mode 100755
index 00000000000..524bb2e9f40
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/train_lm.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Copyright 2016  Vincent Nguyen
+#           2016  Johns Hopkins University (author: Daniel Povey)
+#           2017  Ashish Arora
+#           2017  Hossein Hadian
+# Apache 2.0
+#
+# This script trains a LM on the training transcriptions.
+# It is based on the example scripts distributed with PocoLM
+
+# It will check if pocolm is installed and if not will proceed with installation
+
+set -e
+stage=0
+dir=data/local/local_lm
+order=3
+echo "$0 $@"  # Print the command line for logging
+. ./utils/parse_options.sh || exit 1;
+
+lm_dir=${dir}/data
+
+
+mkdir -p $dir
+. ./path.sh || exit 1; # for KALDI_ROOT
+export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
+( # First make sure the pocolm toolkit is installed.
+ cd $KALDI_ROOT/tools || exit 1;
+ if [ -d pocolm ]; then
+   echo Not installing the pocolm toolkit since it is already there.
+ else
+   echo "$0: Please install the PocoLM toolkit with: "
+   echo " cd ../../../tools; extras/install_pocolm.sh; cd -"
+   exit 1;
+ fi
+) || exit 1;
+
+bypass_metaparam_optim_opt=
+# If you want to bypass the metaparameter optimization steps with specific metaparameters
+# un-comment the following line, and change the numbers to some appropriate values.
+# You can find the values from output log of train_lm.py.
+# These example numbers of metaparameters is for 4-gram model (with min-counts)
+# running with train_lm.py.
+# The dev perplexity should be close to the non-bypassed model.
+# Note: to use these example parameters, you may need to remove the .done files
+# to make sure the make_lm_dir.py be called and tain only 3-gram model
+#for order in 3; do
+#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done
+if [ $stage -le 0 ]; then
+  mkdir -p ${dir}/data
+  mkdir -p ${dir}/data/text
+
+  echo "$0: Getting the Data sources"
+
+  rm ${dir}/data/text/* 2>/dev/null || true
+
+  # use the validation data as the dev set.
+  # Note: the name 'dev' is treated specially by pocolm, it automatically
+  # becomes the dev set.
+
+  cat data/dev/text | cut -d " " -f 2-  > ${dir}/data/text/dev.txt
+
+  # use the training data as an additional data source.
+  # we can later fold the dev data into this.
+  cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
+
+  # for reporting perplexities, we'll use the "real" dev set.
+  # (the validation data is used as ${dir}/data/text/dev.txt to work
+  # out interpolation weights.)
+  # note, we can't put it in ${dir}/data/text/, because then pocolm would use
+  # it as one of the data sources.
+  cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
+
+  # get the wordlist from MADCAT text
+  cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
+fi
+
+if [ $stage -le 1 ]; then
+  # decide on the vocabulary.
+  # Note: you'd use --wordlist if you had a previously determined word-list
+  # that you wanted to use.
+  # Note: if you have more than one order, use a certain amount of words as the
+  # vocab and want to restrict max memory for 'sort',
+  echo "$0: training the unpruned LM"
+  min_counts='train=1'
+  wordlist=${dir}/data/wordlist
+
+  lm_name="`basename ${wordlist}`_${order}"
+  if [ -n "${min_counts}" ]; then
+    lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
+  fi
+  unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm
+  train_lm.py  --wordlist=${wordlist} --num-splits=20 --warm-start-ratio=20 \
+               --limit-unk-history=true \
+               ${bypass_metaparam_optim_opt} \
+               ${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}
+
+  get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
+  mkdir -p ${dir}/data/arpa
+  format_arpa_lm.py ${unpruned_lm_dir} | gzip -c > ${dir}/data/arpa/${order}gram_unpruned.arpa.gz
+fi

From 7597638d7e8af01854de8711204e0229efdf7db4 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 00:00:27 -0400
Subject: [PATCH 49/67] fixing bugs

---
 egs/madcat_ar/v1/local/chain/compare_wer.sh       | 14 ++++++++++++++
 egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh |  4 ----
 egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh    |  4 ----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/compare_wer.sh b/egs/madcat_ar/v1/local/chain/compare_wer.sh
index ad90710b13f..7f04061dafb 100755
--- a/egs/madcat_ar/v1/local/chain/compare_wer.sh
+++ b/egs/madcat_ar/v1/local/chain/compare_wer.sh
@@ -27,6 +27,13 @@ for x in $*; do
 done
 echo
 
+echo -n "# WER (rescored)             "
+for x in $*; do
+  wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
 echo -n "# CER                        "
 for x in $*; do
   cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
@@ -34,6 +41,13 @@ for x in $*; do
 done
 echo
 
+echo -n "# CER (rescored)             "
+for x in $*; do
+  cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
 if $used_epochs; then
   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
 fi
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index 3e03473faef..cef0f927dd6 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -42,7 +42,6 @@ tdnn_dim=450
 srand=0
 remove_egs=true
 lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -241,9 +240,6 @@ if [ $stage -le 7 ]; then
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
index d43c1f1a0f3..f93ff164b65 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -38,7 +38,6 @@ frames_per_iter=1000000
 cmvn_opts="--norm-means=false --norm-vars=false"
 train_set=train
 lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -163,9 +162,6 @@ if [ $stage -le 5 ]; then
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
 echo "Done. Date: $(date). Results:"

From 479590a61ef73d0bc0c0c93e0a87f59df75fb439 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 01:12:03 -0400
Subject: [PATCH 50/67] fixing run.sh

---
 egs/madcat_ar/v1/run.sh | 69 ++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index 7922bf30ed6..d3937582662 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -32,7 +32,6 @@ mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
 
 if [ $stage -le 0 ]; then
-
   if [ -f data/train/text ] && ! $overwrite; then
     echo "$0: Not processing, probably script have run from wrong stage"
     echo "Exiting with status 1 to avoid data corruption"
@@ -42,33 +41,27 @@ if [ $stage -le 0 ]; then
   echo "$0: Downloading data splits...$(date)"
   local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3
-fi
 
-if [ $stage -le 1 ]; then
-  for dataset in test train dev; do
-    data_split_file=$data_splits_dir/madcat.$dataset.raw.lineid
+  for set in test train dev; do
+    data_split_file=$data_splits_dir/madcat.$set.raw.lineid
     local/extract_lines.sh --nj $nj --cmd $cmd --data_split_file $data_split_file \
         --download_dir1 $download_dir1 --download_dir2 $download_dir2 \
         --download_dir3 $download_dir3 --writing_condition1 $writing_condition1 \
         --writing_condition2 $writing_condition2 --writing_condition3 $writing_condition3 \
-        --data data/local/$dataset
+        --data data/local/$set --subset $subset --augment $augment || exit 1
   done
-fi
 
-if [ $stage -le 2 ]; then
   echo "$0: Preparing data..."
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 || exit 1
-      data/local/splits/${set}.txt data/${set}
+      $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset 
     image/fix_data_dir.sh data/${set}
   done
 fi
 
-mkdir -p data/{train,test,dev}/data
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 1 ]; then
   for dataset in test train; do
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset
     steps/compute_cmvn_stats.sh data/$dataset || exit 1;
@@ -76,14 +69,32 @@ if [ $stage -le 3 ]; then
   utils/fix_data_dir.sh data/train
 fi
 
-if [ $stage -le 4 ]; then
-  echo "$0: Preparing dictionary and lang..."
+if [ $stage -le 2 ]; then
+  echo "$0: Preparing BPE..."
+  cut -d' ' -f2- data/train/text | utils/lang/bpe/reverse.py | \
+    utils/lang/bpe/prepend_words.py | \
+    utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt
+
+  for set in test train dev; do
+    cut -d' ' -f1 data/$set/text > data/$set/ids
+    cut -d' ' -f2- data/$set/text | utils/lang/bpe/reverse.py | \
+      utils/lang/bpe/prepend_words.py | \
+      utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > data/$set/bpe_text
+
+    mv data/$set/text data/$set/text.old
+    paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text
+    rm -f data/$set/bpe_text data/$set/ids
+  done
+
+  echo "$0:Preparing dictionary and lang..."
   local/prepare_dict.sh
-  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \
-    data/local/dict "<sil>" data/lang/temp data/lang
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \
+                        data/local/dict "<sil>" data/lang/temp data/lang
+  utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
@@ -92,19 +103,19 @@ if [ $stage -le 5 ]; then
                                data/lang data/lang_rescore_6g
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 4 ]; then
   steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train \
     data/lang exp/mono
 fi
 
-if [ $stage -le 7 ] && $decode_gmm; then
-  utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
+if [ $stage -le 5 ] && $decode_gmm; then
+  utils/mkgraph.sh --mono data/lang exp/mono exp/mono/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
     exp/mono/decode_test
 fi
 
-if [ $stage -le 8 ]; then
+if [ $stage -le 6 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
     exp/mono exp/mono_ali
 
@@ -112,14 +123,14 @@ if [ $stage -le 8 ]; then
     exp/mono_ali exp/tri
 fi
 
-if [ $stage -le 9 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang_test exp/tri exp/tri/graph
+if [ $stage -le 7 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang exp/tri exp/tri/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
     exp/tri/decode_test
 fi
 
-if [ $stage -le 10 ]; then
+if [ $stage -le 8 ]; then
   steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
     exp/tri exp/tri_ali
 
@@ -128,22 +139,22 @@ if [ $stage -le 10 ]; then
     data/train data/lang exp/tri_ali exp/tri3
 fi
 
-if [ $stage -le 11 ] && $decode_gmm; then
-  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
+if [ $stage -le 9 ] && $decode_gmm; then
+  utils/mkgraph.sh data/lang exp/tri3 exp/tri3/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri3/graph \
     data/test exp/tri3/decode_test
 fi
 
-if [ $stage -le 12 ]; then
+if [ $stage -le 10 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
     data/train data/lang exp/tri3 exp/tri3_ali
 fi
 
-if [ $stage -le 13 ]; then
+if [ $stage -le 11 ]; then
   local/chain/run_cnn.sh
 fi
 
-if [ $stage -le 14 ]; then
+if [ $stage -le 12 ]; then
   local/chain/run_cnn_chainali.sh --stage 2
 fi

From 87ab218a35be29a2a2d8154d244a7c79dab4895a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 13:50:30 -0400
Subject: [PATCH 51/67] fixing bug in language modelling

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index fc18e52e58f..85f662373e9 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -58,7 +58,6 @@ if [ $stage -le 0 ]; then
 
   local/tl/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
   utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
-  #image/fix_data_dir.sh data/test
 fi
 
 if [ $stage -le 1 ]; then
@@ -110,8 +109,8 @@ fi
 if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/tl/train_lm.sh --order 3
-  utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
-                               data/lang data/lang
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
+                     data/local/dict/lexicon.txt data/lang
 fi
 
 nj=30

From d9790005b4d07866d51698fbb88ad7688f5a3d84 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 15:13:31 -0400
Subject: [PATCH 52/67] correcting options

---
 egs/madcat_ar/v1/run_end2end.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index ccb177a6896..a6ebb3cb5fb 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -17,8 +17,8 @@ writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
 images_scp_dir=data/local
 overwrite=false
-subset=true
-augment=true
+subset=false
+augment=false
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh

From ed3ab450cd7bec2300415af88cfccafd3e128efb Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 15:16:45 -0400
Subject: [PATCH 53/67] adding comments

---
 egs/madcat_ar/v1/local/wer_output_filter | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/egs/madcat_ar/v1/local/wer_output_filter b/egs/madcat_ar/v1/local/wer_output_filter
index c0f03e7178a..d6d46f3f565 100755
--- a/egs/madcat_ar/v1/local/wer_output_filter
+++ b/egs/madcat_ar/v1/local/wer_output_filter
@@ -2,6 +2,9 @@
 # Copyright 2012-2014  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0
 
+# This script converts a BPE-encoded text to normal text and performs normalization.
+# It is used in scoring.
+
 use utf8;
 
 use open qw(:encoding(utf8));

From 22df693730b0fe6313a50925cad4c15bb9614870 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 15:27:44 -0400
Subject: [PATCH 54/67] fixing conflict

---
 .../local/create_line_image_from_page_image.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
index c7525cea89c..778555c427e 100755
--- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
+++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py
@@ -188,24 +188,6 @@ def rectangle_corners(rectangle):
     return rotate_points(rectangle['rectangle_center'], rectangle['unit_vector_angle'], corner_points)
 
 
-<<<<<<< HEAD
-def get_orientation(origin, p1, p2):
-    """
-    Given origin and two points, return the orientation of the Point p1 with
-    regards to Point p2 using origin.
-    Returns
-    -------
-    integer: Negative if p1 is clockwise of p2.
-    """
-    difference = (
-        ((p2[0] - origin[0]) * (p1[1] - origin[1]))
-        - ((p1[0] - origin[0]) * (p2[1] - origin[1]))
-    )
-    return difference
-
-
-=======
->>>>>>> ed3ab450cd7bec2300415af88cfccafd3e128efb
 def minimum_bounding_box(points):
     """ Given a list of 2D points, it returns the minimum area rectangle bounding all
         the points in the point cloud.

From 95b1c3a73b9a4e8fa887029a1960eb9536beba30 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 16:30:19 -0400
Subject: [PATCH 55/67] updating chain parameters

---
 .../v1/local/tl/chain/run_cnn_e2eali.sh       | 23 ++++---------------
 .../v1/local/tl/chain/run_e2e_cnn.sh          | 15 +++++-------
 2 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index cef0f927dd6..7dac49d32f4 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -23,22 +23,17 @@ stage=0
 nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 common_egs_dir=
 reporting_email=
 
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
 srand=0
 remove_egs=true
 lang_decode=data/lang
@@ -120,7 +115,7 @@ if [ $stage -le 3 ]; then
   fi
 
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --alignment-subsampling-factor 1 \
     --context-opts "--context-width=2 --central-position=1" \
     --cmd "$cmd" $num_leaves ${train_data_dir} \
@@ -185,15 +180,15 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=2 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
@@ -202,10 +197,6 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
     --cleanup.remove-egs=$remove_egs \
@@ -233,10 +224,6 @@ fi
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
index f93ff164b65..525207423a3 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -28,12 +28,8 @@ affix=1a
 
 # training options
 tdnn_dim=450
-num_epochs=2
-num_jobs_initial=3
-num_jobs_final=16
 minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
 common_egs_dir=
-l2_regularize=0.00005
 frames_per_iter=1000000
 cmvn_opts="--norm-means=false --norm-vars=false"
 train_set=train
@@ -120,20 +116,21 @@ if [ $stage -le 3 ]; then
     --cmd "$cmd" \
     --feat.cmvn-opts "$cmvn_opts" \
     --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
+    --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
     --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs 2 \
     --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.num-jobs-initial 3 \
+    --trainer.optimization.num-jobs-final 16 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.optimization.shrink-value 1.0 \

From 0b71dae5778dd0a74b90511f66acaf9a8ebd95a1 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 16:37:35 -0400
Subject: [PATCH 56/67] updating chain parameters

---
 .../v1/local/chain/tuning/run_cnn_e2eali_1b.sh  | 14 +-------------
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh     | 17 ++++++-----------
 2 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index b0b77be2a18..4fe730d2728 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -32,12 +32,8 @@ reporting_email=
 train_stage=-10
 xent_regularize=0.1
 frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
 # training options
 srand=0
@@ -186,7 +182,7 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --chain.frame-subsampling-factor=$frame_subsampling_factor \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
@@ -203,10 +199,6 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
     --cleanup.remove-egs=$remove_egs \
@@ -234,10 +226,6 @@ fi
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index bf215a0cae2..2891e50da9e 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -27,14 +27,9 @@ affix=1a
 
 # training options
 tdnn_dim=450
-num_epochs=2
-num_jobs_initial=6
-num_jobs_final=16
 minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
 common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=2000000
-cmvn_opts="--norm-means=true --norm-vars=true"
+cmvn_opts="--norm-means=false --norm-vars=false"
 train_set=train
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
@@ -119,7 +114,7 @@ if [ $stage -le 3 ]; then
     --cmd "$cmd" \
     --feat.cmvn-opts "$cmvn_opts" \
     --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
+    --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
@@ -129,11 +124,11 @@ if [ $stage -le 3 ]; then
     --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs 2 \
     --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.num-jobs-initial 6 \
+    --trainer.optimization.num-jobs-final 16 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.optimization.shrink-value 1.0 \

From e380a205535ff3316f3f446a054a9cea786ee2d6 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 16:50:31 -0400
Subject: [PATCH 57/67] updating parameters

---
 .../local/chain/tuning/run_cnn_e2eali_1a.sh   | 20 ++++---------------
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   |  4 ++--
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index 38de5fe3b7c..ee84ea0d83f 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -19,12 +19,8 @@ reporting_email=
 train_stage=-10
 xent_regularize=0.1
 frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
 # training options
 srand=0
@@ -172,28 +168,24 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --chain.frame-subsampling-factor=$frame_subsampling_factor \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=2 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.num-epochs=4 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=96,64 \
+    --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --trainer.add-option="--optimization.memory-compression-level=2" \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
     --cleanup.remove-egs=$remove_egs \
@@ -221,10 +213,6 @@ fi
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 4fe730d2728..c6052b76e7f 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -103,7 +103,6 @@ if [ $stage -le 2 ]; then
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
                             ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
   echo "" >$lat_dir/splice_opts
-
 fi
 
 if [ $stage -le 3 ]; then
@@ -198,6 +197,7 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
+    --trainer.add-option="--optimization.memory-compression-level=2" \
     --egs.chunk-width=$chunk_width \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
@@ -219,7 +219,7 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 

From 639289df6d32c816c2b2bc8f641b60a4ebf3757a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 16:53:46 -0400
Subject: [PATCH 58/67] updating parameters

---
 egs/madcat_ar/v1/local/tl/run_textlocalization.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 85f662373e9..1a0aaf738d2 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -18,7 +18,7 @@ data_splits_dir=data/download/data_splits
 images_scp_dir=data/local
 overwrite=false
 subset=true
-augment=false
+augment=true
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh

From 04e023625bda485de9b4e9fd48e0b0afc40964ec Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 17:04:38 -0400
Subject: [PATCH 59/67] updating parameters

---
 .../v1/local/chain/tuning/run_cnn_1a.sh       | 19 ++++---------------
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 18 +++---------------
 2 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
index 02d095b3a82..eb140e900e1 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
@@ -21,13 +21,10 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
 # we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
 # training options
 srand=0
@@ -169,13 +166,13 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$frame_subsampling_factor \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=4 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
@@ -184,10 +181,6 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
     --cleanup.remove-egs=$remove_egs \
@@ -215,10 +208,6 @@ fi
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index 5faf6a73691..5b3597a3915 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -18,13 +18,9 @@ lats_affix=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
 # training options
 srand=0
@@ -171,13 +167,13 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --trainer.srand=$srand \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
@@ -186,10 +182,6 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
     --cleanup.remove-egs=$remove_egs \
@@ -217,10 +209,6 @@ fi
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;

From 9c33a35da9c0554b0bcbced987819ba4c3f2828e Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Fri, 21 Sep 2018 20:50:30 -0400
Subject: [PATCH 60/67] fixing bug in make features

---
 egs/madcat_ar/v1/local/extract_features.sh    |   3 +-
 egs/madcat_ar/v1/local/make_features.py       |  47 +----
 egs/madcat_ar/v1/local/tl/make_features.py    | 170 ++++++++++++++++++
 .../v1/local/tl/run_textlocalization.sh       |   2 +-
 4 files changed, 180 insertions(+), 42 deletions(-)
 create mode 100755 egs/madcat_ar/v1/local/tl/make_features.py

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 7df6385d9c9..91b38a0407e 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -10,6 +10,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 augment=false
+script_path=local
 echo "$0 $@"
 
 . ./cmd.sh
@@ -35,7 +36,7 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  local/make_features.py $logdir/images.JOB.scp \
+  $script_path/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
     --feat-dim $feat_dim --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
index 1dbefe48f64..21ae44be1da 100755
--- a/egs/madcat_ar/v1/local/make_features.py
+++ b/egs/madcat_ar/v1/local/make_features.py
@@ -14,16 +14,14 @@
     to enforce the images to have the specified length in that file by padding
     white pixels (the --padding option will be ignored in this case). This relates
     to end2end chain training.
-
     eg. local/make_features.py data/train --feat-dim 40
 """
-import random
+
 import argparse
 import os
 import sys
 import numpy as np
 from scipy import misc
-import math
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
@@ -39,10 +37,8 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
-parser.add_argument('--vertical-shift', type=int, default=16,
-                    help='total number of padding pixel per column')
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
+
+
 args = parser.parse_args()
 
 
@@ -97,39 +93,15 @@ def horizontal_pad(im, allowed_lengths = None):
                                                     dtype=int)), axis=1)
     return im_pad1
 
-def vertical_shift(im, mode='mid'):
-    total = args.vertical_shift
-    if mode == 'notmid':
-        val = random.randint(0, 1)
-        if val == 0:
-            mode = 'top'
-        else:
-            mode = 'bottom'
-    if mode == 'mid':
-        top = int(total / 2)
-        bottom = total - top
-    elif mode == 'top':  # more padding on top
-        top = random.randint(total / 2, total)
-        bottom = total - top
-    elif mode == 'bottom':  # more padding on bottom
-        top = random.randint(0, total / 2)
-        bottom = total - top
-    width = im.shape[1]
-    im_pad = np.concatenate(
-        (255 * np.ones((top, width), dtype=int) -
-         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
-    im_pad = np.concatenate(
-        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
-         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
-    return im_pad
 
 ### main ###
-random.seed(1)
+
 data_list_path = args.images_scp_path
+
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'w')
+    out_fh = open(args.out_ark,'wb')
 
 allowed_lengths = None
 allowed_len_handle = args.allowed_len_file_path
@@ -144,7 +116,6 @@ def vertical_shift(im, mode='mid'):
 
 num_fail = 0
 num_ok = 0
-aug_setting = ['mid', 'notmid']
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -157,11 +128,7 @@ def vertical_shift(im, mode='mid'):
         if im_horizontal_padded is None:
             num_fail += 1
             continue
-        if args.augment:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
-        else:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
-        data = np.transpose(im_shift, (1, 0))
+        data = np.transpose(im_horizontal_padded, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
diff --git a/egs/madcat_ar/v1/local/tl/make_features.py b/egs/madcat_ar/v1/local/tl/make_features.py
new file mode 100755
index 00000000000..1dbefe48f64
--- /dev/null
+++ b/egs/madcat_ar/v1/local/tl/make_features.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2018  Hossein Hadian
+
+""" This script converts images to Kaldi-format feature matrices. The input to
+    this script is the path to a data directory, e.g. "data/train". This script
+    reads the images listed in images.scp and writes them to standard output
+    (by default) as Kaldi-formatted matrices (in text form). It also scales the
+    images so they have the same height (via --feat-dim). It can optionally pad
+    the images (on left/right sides) with white pixels.
+    If an 'image2num_frames' file is found in the data dir, it will be used
+    to enforce the images to have the specified length in that file by padding
+    white pixels (the --padding option will be ignored in this case). This relates
+    to end2end chain training.
+
+    eg. local/make_features.py data/train --feat-dim 40
+"""
+import random
+import argparse
+import os
+import sys
+import numpy as np
+from scipy import misc
+import math
+
+parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
+                                                writes them to standard output in text format.""")
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
+parser.add_argument('--out-ark', type=str, default='-',
+                    help='Where to write the output feature file')
+parser.add_argument('--feat-dim', type=int, default=40,
+                    help='Size to scale the height of all images')
+parser.add_argument('--padding', type=int, default=5,
+                    help='Number of white pixels to pad on the left'
+                    'and right side of the image.')
+parser.add_argument('--vertical-shift', type=int, default=16,
+                    help='total number of padding pixel per column')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
+args = parser.parse_args()
+
+
+def write_kaldi_matrix(file_handle, matrix, key):
+    file_handle.write(key + " [ ")
+    num_rows = len(matrix)
+    if num_rows == 0:
+        raise Exception("Matrix is empty")
+    num_cols = len(matrix[0])
+
+    for row_index in range(len(matrix)):
+        if num_cols != len(matrix[row_index]):
+            raise Exception("All the rows of a matrix are expected to "
+                            "have the same length")
+        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
+        if row_index != num_rows - 1:
+            file_handle.write("\n")
+    file_handle.write(" ]\n")
+
+
+def get_scaled_image(im):
+    scale_size = args.feat_dim
+    sx = im.shape[1]  # width
+    sy = im.shape[0]  # height
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx)
+    im = misc.imresize(im, (nx, ny))
+    return im
+
+
+def horizontal_pad(im, allowed_lengths = None):
+    if allowed_lengths is None:
+        left_padding = right_padding = args.padding
+    else:  # Find an allowed length for the image
+        imlen = im.shape[1] # width
+        allowed_len = 0
+        for l in allowed_lengths:
+            if l > imlen:
+                allowed_len = l
+                break
+        if allowed_len == 0:
+            #  No allowed length was found for the image (the image is too long)
+            return None
+        padding = allowed_len - imlen
+        left_padding = int(padding // 2)
+        right_padding = padding - left_padding
+    dim_y = im.shape[0] # height
+    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
+                                           dtype=int), im), axis=1)
+    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
+                                                    dtype=int)), axis=1)
+    return im_pad1
+
+def vertical_shift(im, mode='mid'):
+    total = args.vertical_shift
+    if mode == 'notmid':
+        val = random.randint(0, 1)
+        if val == 0:
+            mode = 'top'
+        else:
+            mode = 'bottom'
+    if mode == 'mid':
+        top = int(total / 2)
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
+if args.out_ark == '-':
+    out_fh = sys.stdout
+else:
+    out_fh = open(args.out_ark,'w')
+
+allowed_lengths = None
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
+    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
+    allowed_lengths = []
+    with open(allowed_len_handle) as f:
+        for line in f:
+            allowed_lengths.append(int(line.strip()))
+    print("Read {} allowed lengths and will apply them to the "
+          "features.".format(len(allowed_lengths)), file=sys.stderr)
+
+num_fail = 0
+num_ok = 0
+aug_setting = ['mid', 'notmid']
+with open(data_list_path) as f:
+    for line in f:
+        line = line.strip()
+        line_vect = line.split(' ')
+        image_id = line_vect[0]
+        image_path = line_vect[1]
+        im = misc.imread(image_path)
+        im_scaled = get_scaled_image(im)
+        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
+        if im_horizontal_padded is None:
+            num_fail += 1
+            continue
+        if args.augment:
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
+        else:
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
+        data = np.transpose(im_shift, (1, 0))
+        data = np.divide(data, 255.0)
+        num_ok += 1
+        write_kaldi_matrix(out_fh, data, image_id)
+
+print('Generated features for {} images. Failed for {} (image too '
+      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 1a0aaf738d2..7e7aabeac48 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -66,7 +66,7 @@ if [ $stage -le 1 ]; then
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   for set in dev train test; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 --script_path local/tl data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
   echo "$0: Fixing data directory for train dataset $(date)."

From d4516eab853227c3cd2a3c7bb18c88f3d7b9f6e6 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Sat, 22 Sep 2018 05:57:02 -0400
Subject: [PATCH 61/67] Revert "fixing bug in make features"

This reverts commit 9c33a35da9c0554b0bcbced987819ba4c3f2828e.
---
 egs/madcat_ar/v1/local/extract_features.sh    |   3 +-
 egs/madcat_ar/v1/local/make_features.py       |  47 ++++-
 egs/madcat_ar/v1/local/tl/make_features.py    | 170 ------------------
 .../v1/local/tl/run_textlocalization.sh       |   2 +-
 4 files changed, 42 insertions(+), 180 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/tl/make_features.py

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 91b38a0407e..7df6385d9c9 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -10,7 +10,6 @@ nj=4
 cmd=run.pl
 feat_dim=40
 augment=false
-script_path=local
 echo "$0 $@"
 
 . ./cmd.sh
@@ -36,7 +35,7 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  $script_path/make_features.py $logdir/images.JOB.scp \
+  local/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
     --feat-dim $feat_dim --augment $augment \| \
     copy-feats --compress=true --compression-method=7 \
diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
index 21ae44be1da..1dbefe48f64 100755
--- a/egs/madcat_ar/v1/local/make_features.py
+++ b/egs/madcat_ar/v1/local/make_features.py
@@ -14,14 +14,16 @@
     to enforce the images to have the specified length in that file by padding
     white pixels (the --padding option will be ignored in this case). This relates
     to end2end chain training.
+
     eg. local/make_features.py data/train --feat-dim 40
 """
-
+import random
 import argparse
 import os
 import sys
 import numpy as np
 from scipy import misc
+import math
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
@@ -37,8 +39,10 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
-
-
+parser.add_argument('--vertical-shift', type=int, default=16,
+                    help='total number of padding pixel per column')
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 
@@ -93,15 +97,39 @@ def horizontal_pad(im, allowed_lengths = None):
                                                     dtype=int)), axis=1)
     return im_pad1
 
+def vertical_shift(im, mode='mid'):
+    total = args.vertical_shift
+    if mode == 'notmid':
+        val = random.randint(0, 1)
+        if val == 0:
+            mode = 'top'
+        else:
+            mode = 'bottom'
+    if mode == 'mid':
+        top = int(total / 2)
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
 
 ### main ###
-
+random.seed(1)
 data_list_path = args.images_scp_path
-
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    out_fh = open(args.out_ark,'w')
 
 allowed_lengths = None
 allowed_len_handle = args.allowed_len_file_path
@@ -116,6 +144,7 @@ def horizontal_pad(im, allowed_lengths = None):
 
 num_fail = 0
 num_ok = 0
+aug_setting = ['mid', 'notmid']
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -128,7 +157,11 @@ def horizontal_pad(im, allowed_lengths = None):
         if im_horizontal_padded is None:
             num_fail += 1
             continue
-        data = np.transpose(im_horizontal_padded, (1, 0))
+        if args.augment:
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
+        else:
+            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
+        data = np.transpose(im_shift, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
diff --git a/egs/madcat_ar/v1/local/tl/make_features.py b/egs/madcat_ar/v1/local/tl/make_features.py
deleted file mode 100755
index 1dbefe48f64..00000000000
--- a/egs/madcat_ar/v1/local/tl/make_features.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2018  Hossein Hadian
-
-""" This script converts images to Kaldi-format feature matrices. The input to
-    this script is the path to a data directory, e.g. "data/train". This script
-    reads the images listed in images.scp and writes them to standard output
-    (by default) as Kaldi-formatted matrices (in text form). It also scales the
-    images so they have the same height (via --feat-dim). It can optionally pad
-    the images (on left/right sides) with white pixels.
-    If an 'image2num_frames' file is found in the data dir, it will be used
-    to enforce the images to have the specified length in that file by padding
-    white pixels (the --padding option will be ignored in this case). This relates
-    to end2end chain training.
-
-    eg. local/make_features.py data/train --feat-dim 40
-"""
-import random
-import argparse
-import os
-import sys
-import numpy as np
-from scipy import misc
-import math
-
-parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
-                                                writes them to standard output in text format.""")
-parser.add_argument('images_scp_path', type=str,
-                    help='Path of images.scp file')
-parser.add_argument('--allowed_len_file_path', type=str, default=None,
-                    help='If supplied, each images will be padded to reach the '
-                    'target length (this overrides --padding).')
-parser.add_argument('--out-ark', type=str, default='-',
-                    help='Where to write the output feature file')
-parser.add_argument('--feat-dim', type=int, default=40,
-                    help='Size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5,
-                    help='Number of white pixels to pad on the left'
-                    'and right side of the image.')
-parser.add_argument('--vertical-shift', type=int, default=16,
-                    help='total number of padding pixel per column')
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
-args = parser.parse_args()
-
-
-def write_kaldi_matrix(file_handle, matrix, key):
-    file_handle.write(key + " [ ")
-    num_rows = len(matrix)
-    if num_rows == 0:
-        raise Exception("Matrix is empty")
-    num_cols = len(matrix[0])
-
-    for row_index in range(len(matrix)):
-        if num_cols != len(matrix[row_index]):
-            raise Exception("All the rows of a matrix are expected to "
-                            "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
-        if row_index != num_rows - 1:
-            file_handle.write("\n")
-    file_handle.write(" ]\n")
-
-
-def get_scaled_image(im):
-    scale_size = args.feat_dim
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    return im
-
-
-def horizontal_pad(im, allowed_lengths = None):
-    if allowed_lengths is None:
-        left_padding = right_padding = args.padding
-    else:  # Find an allowed length for the image
-        imlen = im.shape[1] # width
-        allowed_len = 0
-        for l in allowed_lengths:
-            if l > imlen:
-                allowed_len = l
-                break
-        if allowed_len == 0:
-            #  No allowed length was found for the image (the image is too long)
-            return None
-        padding = allowed_len - imlen
-        left_padding = int(padding // 2)
-        right_padding = padding - left_padding
-    dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
-                                           dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
-                                                    dtype=int)), axis=1)
-    return im_pad1
-
-def vertical_shift(im, mode='mid'):
-    total = args.vertical_shift
-    if mode == 'notmid':
-        val = random.randint(0, 1)
-        if val == 0:
-            mode = 'top'
-        else:
-            mode = 'bottom'
-    if mode == 'mid':
-        top = int(total / 2)
-        bottom = total - top
-    elif mode == 'top':  # more padding on top
-        top = random.randint(total / 2, total)
-        bottom = total - top
-    elif mode == 'bottom':  # more padding on bottom
-        top = random.randint(0, total / 2)
-        bottom = total - top
-    width = im.shape[1]
-    im_pad = np.concatenate(
-        (255 * np.ones((top, width), dtype=int) -
-         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
-    im_pad = np.concatenate(
-        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
-         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
-    return im_pad
-
-### main ###
-random.seed(1)
-data_list_path = args.images_scp_path
-if args.out_ark == '-':
-    out_fh = sys.stdout
-else:
-    out_fh = open(args.out_ark,'w')
-
-allowed_lengths = None
-allowed_len_handle = args.allowed_len_file_path
-if os.path.isfile(allowed_len_handle):
-    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
-    allowed_lengths = []
-    with open(allowed_len_handle) as f:
-        for line in f:
-            allowed_lengths.append(int(line.strip()))
-    print("Read {} allowed lengths and will apply them to the "
-          "features.".format(len(allowed_lengths)), file=sys.stderr)
-
-num_fail = 0
-num_ok = 0
-aug_setting = ['mid', 'notmid']
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im)
-        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
-        if im_horizontal_padded is None:
-            num_fail += 1
-            continue
-        if args.augment:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
-        else:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
-        data = np.transpose(im_shift, (1, 0))
-        data = np.divide(data, 255.0)
-        num_ok += 1
-        write_kaldi_matrix(out_fh, data, image_id)
-
-print('Generated features for {} images. Failed for {} (image too '
-      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
index 7e7aabeac48..1a0aaf738d2 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
@@ -66,7 +66,7 @@ if [ $stage -le 1 ]; then
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   for set in dev train test; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 --script_path local/tl data/$set
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
   echo "$0: Fixing data directory for train dataset $(date)."

From bac599a37d573b56a24e0b3724b1320ed7718425 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 03:54:32 -0400
Subject: [PATCH 62/67] modification from review

---
 egs/madcat_ar/v1/local/extract_features.sh    |  4 ++-
 egs/madcat_ar/v1/local/make_features.py       |  2 ++
 .../v1/local/tl/chain/run_cnn_e2eali.sh       | 28 ++++++++-----------
 .../v1/local/tl/chain/run_e2e_cnn.sh          | 14 +++++-----
 ...calization.sh => run_text_localization.sh} | 10 ++++++-
 egs/madcat_ar/v1/run_end2end.sh               |  2 +-
 6 files changed, 34 insertions(+), 26 deletions(-)
 rename egs/madcat_ar/v1/local/tl/{run_textlocalization.sh => run_text_localization.sh} (91%)

diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 7df6385d9c9..06207482a18 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -10,6 +10,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 augment=false
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -37,7 +38,8 @@ utils/split_scp.pl $scp $split_scps || exit 1;
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
   local/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --augment $augment \| \
+    --feat-dim $feat_dim --augment $augment \
+    --vertical-shift $verticle_shift \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
index 1dbefe48f64..34d38dd9c82 100755
--- a/egs/madcat_ar/v1/local/make_features.py
+++ b/egs/madcat_ar/v1/local/make_features.py
@@ -98,6 +98,8 @@ def horizontal_pad(im, allowed_lengths = None):
     return im_pad1
 
 def vertical_shift(im, mode='mid'):
+    if args.vertical_shift == 0:
+        return im
     total = args.vertical_shift
     if mode == 'notmid':
         val = random.randint(0, 1)
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
index 7dac49d32f4..e0cca104f50 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh
@@ -1,21 +1,17 @@
 #!/bin/bash
 
-# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
-# lattice alignments and to build a tree
-
-# local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b
-# System                      cnn_e2eali_1b
-# WER                             10.78
-# CER                              2.99
-# Final train prob              -0.0587
-# Final valid prob              -0.0609
-# Final train prob (xent)       -0.4471
-# Final valid prob (xent)       -0.4653
-# Parameters                      3.37M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
-#exp/chain/cnn_e2eali_1b: num-iters=179 nj=8..16 num-params=3.4M dim=40->416 combine=-0.058->-0.058 (over 3) xent:train/valid[118,178,final]=(-0.463,-0.445,-0.447/-0.477,-0.462,-0.465) logprob:train/valid[118,178,final]=(-0.062,-0.059,-0.059/-0.063,-0.061,-0.061)
-
+# ./local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a/
+# System                      cnn_e2eali_1a
+# WER                             16.78
+# CER                              5.22
+# Final train prob              -0.1189
+# Final valid prob              -0.1319
+# Final train prob (xent)       -0.6395
+# Final valid prob (xent)       -0.6732
+# Parameters                      3.73M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a/
+# exp/chain/cnn_e2eali_1a/: num-iters=24 nj=3..15 num-params=3.7M dim=56->392 combine=-0.125->-0.125 (over 1) xent:train/valid[15,23,final]=(-0.850,-1.24,-0.640/-0.901,-1.31,-0.673) logprob:train/valid[15,23,final]=(-0.149,-0.209,-0.119/-0.166,-0.229,-0.132)
 set -e -o pipefail
 
 stage=0
diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
index 525207423a3..3fca8cf5fdc 100755
--- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
+++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh
@@ -3,18 +3,18 @@
 
 # This script does end2end chain training (i.e. from scratch)
 
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
 # System                      e2e_cnn_1a
-# WER                             10.71
-# CER                              2.85
-# Final train prob              -0.0859
-# Final valid prob              -0.1266
+# WER                             19.30
+# CER                              5.72
+# Final train prob              -0.0734
+# Final valid prob              -0.0607
 # Final train prob (xent)
 # Final valid prob (xent)
-# Parameters                      2.94M
+# Parameters                      3.30M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
-# exp/chain/e2e_cnn_1a/: num-iters=195 nj=6..16 num-params=2.9M dim=40->324 combine=-0.065->-0.064 (over 5) logprob:train/valid[129,194,final]=(-0.078,-0.077,-0.086/-0.129,-0.126,-0.127)
+# exp/chain/e2e_cnn_1a/: num-iters=24 nj=3..15 num-params=3.3M dim=56->292 combine=-0.060->-0.060 (over 1) logprob:train/valid[15,23,final]=(-0.122,-0.143,-0.073/-0.105,-0.132,-0.061)
 
 set -e
 
diff --git a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
similarity index 91%
rename from egs/madcat_ar/v1/local/tl/run_textlocalization.sh
rename to egs/madcat_ar/v1/local/tl/run_text_localization.sh
index 1a0aaf738d2..5277dc58a30 100755
--- a/egs/madcat_ar/v1/local/tl/run_textlocalization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
@@ -1,6 +1,12 @@
 #!/bin/bash
 # Copyright 2017    Hossein Hadian
 #           2018    Ashish Arora
+""" This script performs full page text recognition on automatically extracted line images
+    from madcat arabic data. It is created as a separate scrip, because it performs
+    data augmentation, uses smaller language model and calls process_waldo_data for
+    test images (automatically extracted line images). Data augmentation increases image
+    height hence requires different DNN arachitecture and different chain scripts.
+"""
 set -e
 stage=0
 nj=70
@@ -19,6 +25,7 @@ images_scp_dir=data/local
 overwrite=false
 subset=true
 augment=true
+verticle_shift=16
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -66,7 +73,8 @@ if [ $stage -le 1 ]; then
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   for set in dev train test; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 \
+    --verticle_shift $verticle_shift data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
   done
   echo "$0: Fixing data directory for train dataset $(date)."
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index a6ebb3cb5fb..de67e444f39 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -100,7 +100,7 @@ fi
 if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
-  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_small.arpa.gz \
+  utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
                      data/local/dict/lexicon.txt data/lang
   utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
                                data/lang data/lang_rescore_6g

From f0a990e09d376cbd8b26599e4cbc772a60762759 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 05:34:11 -0400
Subject: [PATCH 63/67] modification from review, adding new augmentation in
 make feature

---
 egs/cifar/v1/image/ocr/make_features.py       |  75 ++++++--
 egs/madcat_ar/v1/local/extract_features.sh    |   6 +-
 egs/madcat_ar/v1/local/make_features.py       | 172 ------------------
 egs/madcat_ar/v1/local/tl/augment_data.sh     |   2 +-
 .../v1/local/tl/run_text_localization.sh      |   3 +-
 egs/yomdle_fa/v1/local/augment_data.sh        |   2 +-
 egs/yomdle_fa/v1/local/extract_features.sh    |   4 +-
 egs/yomdle_tamil/v1/local/augment_data.sh     |   2 +-
 egs/yomdle_tamil/v1/local/extract_features.sh |   4 +-
 egs/yomdle_zh/v1/local/augment_data.sh        |   2 +-
 egs/yomdle_zh/v1/local/extract_features.sh    |   4 +-
 11 files changed, 76 insertions(+), 200 deletions(-)
 delete mode 100755 egs/madcat_ar/v1/local/make_features.py

diff --git a/egs/cifar/v1/image/ocr/make_features.py b/egs/cifar/v1/image/ocr/make_features.py
index 07f3cb12257..54d2b92ab25 100755
--- a/egs/cifar/v1/image/ocr/make_features.py
+++ b/egs/cifar/v1/image/ocr/make_features.py
@@ -45,10 +45,13 @@
                     'and right side of the image.')
 parser.add_argument('--num-channels', type=int, default=1,
                     help='Number of color channels')
+parser.add_argument('--vertical-shift', type=int, default=0,
+                    help='total number of padding pixel per column')
 parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
                    help="Flip the image left-right for right to left languages")
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
+parser.add_argument('--augment_type', type=str, default='no_aug',
+                    choices=['no_aug', 'random_scale','random_shift'],
+                    help='Subset of data to process.')
 args = parser.parse_args()
 
 
@@ -112,6 +115,41 @@ def get_scaled_image_aug(im, mode='normal'):
         return im_scaled_up
     return im
 
+def get_scaled_image(im):
+    scale_size = args.feat_dim
+    sx = im.shape[1]  # width
+    sy = im.shape[0]  # height
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx)
+    im = misc.imresize(im, (nx, ny))
+    return im
+
+def vertical_shift(im, mode='mid'):                                                                                                                                                                                                               if args.vertical_shift == 0:                                                                                                                                                                                                                      return im
+    total = args.vertical_shift
+    if mode == 'notmid':
+        val = random.randint(0, 1)
+        if val == 0:
+            mode = 'top'
+        else:
+            mode = 'bottom'
+    if mode == 'mid':
+        top = int(total / 2)
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
 
 ### main ###
 random.seed(1)
@@ -134,7 +172,11 @@ def get_scaled_image_aug(im, mode='normal'):
 
 num_fail = 0
 num_ok = 0
-aug_setting = ['normal', 'scaled']
+if args.augment_type == 'random_scale':
+  aug_setting = ['normal', 'scaled']
+elif args.augment_type == 'random_shift':
+  aug_setting = ['mid', 'notmid']
+
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -144,21 +186,26 @@ def get_scaled_image_aug(im, mode='normal'):
         im = misc.imread(image_path)
         if args.fliplr:
             im = np.fliplr(im)
-        if args.augment:
-            im_aug = get_scaled_image_aug(im, aug_setting[1])
-        else:
-            im_aug = get_scaled_image_aug(im, aug_setting[0])
-        im_horizontal_padded = horizontal_pad(im_aug, allowed_lengths)
-        if im_horizontal_padded is None:
+        if args.augment_type == 'no_aug':
+            im = get_scaled_image_aug(im, aug_setting[0])
+            im = vertical_shift(im, aug_setting[0])
+        elif args.augment_type == 'random_scale':
+            im = get_scaled_image_aug(im, aug_setting[1])
+            im = vertical_shift(im, aug_setting[0])
+        elif args.augment_type == 'random_shift':
+            im = get_scaled_image_aug(im, aug_setting[0])
+            im = vertical_shift(im, aug_setting[1])
+        im = horizontal_pad(im, allowed_lengths)
+        if im is None:
             num_fail += 1
             continue
         if args.num_channels == 1:
-            data = np.transpose(im_horizontal_padded, (1, 0))
+            data = np.transpose(im, (1, 0))
         elif args.num_channels == 3:
-            H = im_horizontal_padded.shape[0]
-            W = im_horizontal_padded.shape[1]
-            C = im_horizontal_padded.shape[2]
-            data = np.reshape(np.transpose(im_horizontal_padded, (1, 0, 2)), (W, H * C))
+            H = im.shape[0]
+            W = im.shape[1]
+            C = im.shape[2]
+            data = np.reshape(np.transpose(im, (1, 0, 2)), (W, H * C))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
diff --git a/egs/madcat_ar/v1/local/extract_features.sh b/egs/madcat_ar/v1/local/extract_features.sh
index 06207482a18..9fe588f31b8 100755
--- a/egs/madcat_ar/v1/local/extract_features.sh
+++ b/egs/madcat_ar/v1/local/extract_features.sh
@@ -9,7 +9,7 @@
 nj=4
 cmd=run.pl
 feat_dim=40
-augment=false
+augment='no_aug'
 verticle_shift=0
 echo "$0 $@"
 
@@ -36,9 +36,9 @@ done
 utils/split_scp.pl $scp $split_scps || exit 1;
 
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  local/make_features.py $logdir/images.JOB.scp \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --augment $augment \
+    --feat-dim $feat_dim --augment_type $augment \
     --vertical-shift $verticle_shift \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
diff --git a/egs/madcat_ar/v1/local/make_features.py b/egs/madcat_ar/v1/local/make_features.py
deleted file mode 100755
index 34d38dd9c82..00000000000
--- a/egs/madcat_ar/v1/local/make_features.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2018  Hossein Hadian
-
-""" This script converts images to Kaldi-format feature matrices. The input to
-    this script is the path to a data directory, e.g. "data/train". This script
-    reads the images listed in images.scp and writes them to standard output
-    (by default) as Kaldi-formatted matrices (in text form). It also scales the
-    images so they have the same height (via --feat-dim). It can optionally pad
-    the images (on left/right sides) with white pixels.
-    If an 'image2num_frames' file is found in the data dir, it will be used
-    to enforce the images to have the specified length in that file by padding
-    white pixels (the --padding option will be ignored in this case). This relates
-    to end2end chain training.
-
-    eg. local/make_features.py data/train --feat-dim 40
-"""
-import random
-import argparse
-import os
-import sys
-import numpy as np
-from scipy import misc
-import math
-
-parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
-                                                writes them to standard output in text format.""")
-parser.add_argument('images_scp_path', type=str,
-                    help='Path of images.scp file')
-parser.add_argument('--allowed_len_file_path', type=str, default=None,
-                    help='If supplied, each images will be padded to reach the '
-                    'target length (this overrides --padding).')
-parser.add_argument('--out-ark', type=str, default='-',
-                    help='Where to write the output feature file')
-parser.add_argument('--feat-dim', type=int, default=40,
-                    help='Size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5,
-                    help='Number of white pixels to pad on the left'
-                    'and right side of the image.')
-parser.add_argument('--vertical-shift', type=int, default=16,
-                    help='total number of padding pixel per column')
-parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
-                   help="performs image augmentation")
-args = parser.parse_args()
-
-
-def write_kaldi_matrix(file_handle, matrix, key):
-    file_handle.write(key + " [ ")
-    num_rows = len(matrix)
-    if num_rows == 0:
-        raise Exception("Matrix is empty")
-    num_cols = len(matrix[0])
-
-    for row_index in range(len(matrix)):
-        if num_cols != len(matrix[row_index]):
-            raise Exception("All the rows of a matrix are expected to "
-                            "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
-        if row_index != num_rows - 1:
-            file_handle.write("\n")
-    file_handle.write(" ]\n")
-
-
-def get_scaled_image(im):
-    scale_size = args.feat_dim
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    return im
-
-
-def horizontal_pad(im, allowed_lengths = None):
-    if allowed_lengths is None:
-        left_padding = right_padding = args.padding
-    else:  # Find an allowed length for the image
-        imlen = im.shape[1] # width
-        allowed_len = 0
-        for l in allowed_lengths:
-            if l > imlen:
-                allowed_len = l
-                break
-        if allowed_len == 0:
-            #  No allowed length was found for the image (the image is too long)
-            return None
-        padding = allowed_len - imlen
-        left_padding = int(padding // 2)
-        right_padding = padding - left_padding
-    dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
-                                           dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
-                                                    dtype=int)), axis=1)
-    return im_pad1
-
-def vertical_shift(im, mode='mid'):
-    if args.vertical_shift == 0:
-        return im
-    total = args.vertical_shift
-    if mode == 'notmid':
-        val = random.randint(0, 1)
-        if val == 0:
-            mode = 'top'
-        else:
-            mode = 'bottom'
-    if mode == 'mid':
-        top = int(total / 2)
-        bottom = total - top
-    elif mode == 'top':  # more padding on top
-        top = random.randint(total / 2, total)
-        bottom = total - top
-    elif mode == 'bottom':  # more padding on bottom
-        top = random.randint(0, total / 2)
-        bottom = total - top
-    width = im.shape[1]
-    im_pad = np.concatenate(
-        (255 * np.ones((top, width), dtype=int) -
-         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
-    im_pad = np.concatenate(
-        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
-         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
-    return im_pad
-
-### main ###
-random.seed(1)
-data_list_path = args.images_scp_path
-if args.out_ark == '-':
-    out_fh = sys.stdout
-else:
-    out_fh = open(args.out_ark,'w')
-
-allowed_lengths = None
-allowed_len_handle = args.allowed_len_file_path
-if os.path.isfile(allowed_len_handle):
-    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
-    allowed_lengths = []
-    with open(allowed_len_handle) as f:
-        for line in f:
-            allowed_lengths.append(int(line.strip()))
-    print("Read {} allowed lengths and will apply them to the "
-          "features.".format(len(allowed_lengths)), file=sys.stderr)
-
-num_fail = 0
-num_ok = 0
-aug_setting = ['mid', 'notmid']
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im)
-        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
-        if im_horizontal_padded is None:
-            num_fail += 1
-            continue
-        if args.augment:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[1])
-        else:
-            im_shift = vertical_shift(im_horizontal_padded, aug_setting[0])
-        data = np.transpose(im_shift, (1, 0))
-        data = np.divide(data, 255.0)
-        num_ok += 1
-        write_kaldi_matrix(out_fh, data, image_id)
-
-print('Generated features for {} images. Failed for {} (image too '
-      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
index e49112c9987..8251021acb7 100755
--- a/egs/madcat_ar/v1/local/tl/augment_data.sh
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -26,7 +26,7 @@ for set in $aug_set; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
-    --augment true $datadir/augmentations/$set
+    --augment 'random_shift' $datadir/augmentations/$set
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/madcat_ar/v1/local/tl/run_text_localization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
index 5277dc58a30..7263d45b062 100755
--- a/egs/madcat_ar/v1/local/tl/run_text_localization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
@@ -84,7 +84,8 @@ fi
 if [ $stage -le 2 ]; then
   for set in train; do
     echo "$(date) stage 2: Performing augmentation, it will double training data"
-    local/tl/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    local/tl/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 \
+    --verticle_shift $verticle_shift data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
 fi
diff --git a/egs/yomdle_fa/v1/local/augment_data.sh b/egs/yomdle_fa/v1/local/augment_data.sh
index 34e938db069..20fb1f415d4 100755
--- a/egs/yomdle_fa/v1/local/augment_data.sh
+++ b/egs/yomdle_fa/v1/local/augment_data.sh
@@ -27,7 +27,7 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
-    --fliplr $fliplr --augment true $datadir/augmentations/$set
+    --fliplr $fliplr --augment 'random_scale' $datadir/augmentations/$set
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/yomdle_fa/v1/local/extract_features.sh b/egs/yomdle_fa/v1/local/extract_features.sh
index 7d6806a2712..f75837ae5b3 100755
--- a/egs/yomdle_fa/v1/local/extract_features.sh
+++ b/egs/yomdle_fa/v1/local/extract_features.sh
@@ -6,7 +6,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 fliplr=false
-augment=false
+augment='no_aug'
 num_channels=3
 echo "$0 $@"
 
@@ -35,7 +35,7 @@ utils/split_scp.pl $scp $split_scps || exit 1;
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
   image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --num-channels $num_channels --fliplr $fliplr --augment $augment \| \
+    --feat-dim $feat_dim --num-channels $num_channels --fliplr $fliplr --augment_type $augment \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/yomdle_tamil/v1/local/augment_data.sh b/egs/yomdle_tamil/v1/local/augment_data.sh
index 82fa5230a43..da5213fba65 100755
--- a/egs/yomdle_tamil/v1/local/augment_data.sh
+++ b/egs/yomdle_tamil/v1/local/augment_data.sh
@@ -26,7 +26,7 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
-    --fliplr false --augment true $datadir/augmentations/$set
+    --fliplr false --augment 'random_scale' $datadir/augmentations/$set
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/yomdle_tamil/v1/local/extract_features.sh b/egs/yomdle_tamil/v1/local/extract_features.sh
index 4ed6ba04348..3880ebad3e8 100755
--- a/egs/yomdle_tamil/v1/local/extract_features.sh
+++ b/egs/yomdle_tamil/v1/local/extract_features.sh
@@ -9,7 +9,7 @@
 nj=4
 cmd=run.pl
 feat_dim=40
-augment=false
+augment='no_aug'
 fliplr=false
 echo "$0 $@"
 
@@ -38,7 +38,7 @@ utils/split_scp.pl $scp $split_scps || exit 1;
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
   image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
+    --feat-dim $feat_dim --fliplr $fliplr --augment_type $augment \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/yomdle_zh/v1/local/augment_data.sh b/egs/yomdle_zh/v1/local/augment_data.sh
index 34e938db069..20fb1f415d4 100755
--- a/egs/yomdle_zh/v1/local/augment_data.sh
+++ b/egs/yomdle_zh/v1/local/augment_data.sh
@@ -27,7 +27,7 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
-    --fliplr $fliplr --augment true $datadir/augmentations/$set
+    --fliplr $fliplr --augment 'random_scale' $datadir/augmentations/$set
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/yomdle_zh/v1/local/extract_features.sh b/egs/yomdle_zh/v1/local/extract_features.sh
index 7d6806a2712..f75837ae5b3 100755
--- a/egs/yomdle_zh/v1/local/extract_features.sh
+++ b/egs/yomdle_zh/v1/local/extract_features.sh
@@ -6,7 +6,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 fliplr=false
-augment=false
+augment='no_aug'
 num_channels=3
 echo "$0 $@"
 
@@ -35,7 +35,7 @@ utils/split_scp.pl $scp $split_scps || exit 1;
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
   image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim --num-channels $num_channels --fliplr $fliplr --augment $augment \| \
+    --feat-dim $feat_dim --num-channels $num_channels --fliplr $fliplr --augment_type $augment \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 

From 09da981a3678d3e9bc1ed82acf5c40a00878d3f7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 05:50:36 -0400
Subject: [PATCH 64/67] minor fix

---
 egs/madcat_ar/v1/local/tl/run_text_localization.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/egs/madcat_ar/v1/local/tl/run_text_localization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
index 7263d45b062..8d12f7d802f 100755
--- a/egs/madcat_ar/v1/local/tl/run_text_localization.sh
+++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh
@@ -1,12 +1,13 @@
 #!/bin/bash
 # Copyright 2017    Hossein Hadian
 #           2018    Ashish Arora
-""" This script performs full page text recognition on automatically extracted line images
-    from madcat arabic data. It is created as a separate scrip, because it performs
-    data augmentation, uses smaller language model and calls process_waldo_data for
-    test images (automatically extracted line images). Data augmentation increases image
-    height hence requires different DNN arachitecture and different chain scripts.
-"""
+
+# This script performs full page text recognition on automatically extracted line images
+#    from madcat arabic data. It is created as a separate scrip, because it performs
+#    data augmentation, uses smaller language model and calls process_waldo_data for
+#    test images (automatically extracted line images). Data augmentation increases image
+#    height hence requires different DNN arachitecture and different chain scripts.
+
 set -e
 stage=0
 nj=70

From 3d9615e8109fac2b5101e33f25b55586538d7925 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 15:12:18 -0400
Subject: [PATCH 65/67] fixing bugs

---
 egs/cifar/v1/image/ocr/make_features.py   | 32 +++++++++--------------
 egs/madcat_ar/v1/local/tl/augment_data.sh |  2 ++
 egs/yomdle_fa/v1/local/augment_data.sh    |  3 +++
 egs/yomdle_tamil/v1/local/augment_data.sh |  2 ++
 egs/yomdle_zh/v1/local/augment_data.sh    |  2 ++
 5 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/egs/cifar/v1/image/ocr/make_features.py b/egs/cifar/v1/image/ocr/make_features.py
index 54d2b92ab25..2c4d44a1990 100755
--- a/egs/cifar/v1/image/ocr/make_features.py
+++ b/egs/cifar/v1/image/ocr/make_features.py
@@ -71,7 +71,6 @@ def write_kaldi_matrix(file_handle, matrix, key):
             file_handle.write("\n")
     file_handle.write(" ]\n")
 
-
 def horizontal_pad(im, allowed_lengths = None):
     if allowed_lengths is None:
         left_padding = right_padding = args.padding
@@ -115,17 +114,9 @@ def get_scaled_image_aug(im, mode='normal'):
         return im_scaled_up
     return im
 
-def get_scaled_image(im):
-    scale_size = args.feat_dim
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    return im
-
-def vertical_shift(im, mode='mid'):                                                                                                                                                                                                               if args.vertical_shift == 0:                                                                                                                                                                                                                      return im
+def vertical_shift(im, mode='normal'):
+    if args.vertical_shift == 0:
+        return im
     total = args.vertical_shift
     if mode == 'notmid':
         val = random.randint(0, 1)
@@ -133,7 +124,7 @@ def vertical_shift(im, mode='mid'):
             mode = 'top'
         else:
             mode = 'bottom'
-    if mode == 'mid':
+    if mode == 'normal':
         top = int(total / 2)
         bottom = total - top
     elif mode == 'top':  # more padding on top
@@ -175,7 +166,9 @@ def vertical_shift(im, mode='mid'):
 if args.augment_type == 'random_scale':
   aug_setting = ['normal', 'scaled']
 elif args.augment_type == 'random_shift':
-  aug_setting = ['mid', 'notmid']
+  aug_setting = ['normal', 'notmid']
+else:
+  aug_setting = ['normal']
 
 with open(data_list_path) as f:
     for line in f:
@@ -186,19 +179,18 @@ def vertical_shift(im, mode='mid'):
         im = misc.imread(image_path)
         if args.fliplr:
             im = np.fliplr(im)
-        if args.augment_type == 'no_aug':
+        if args.augment_type == 'no_aug' or 'random_shift':
             im = get_scaled_image_aug(im, aug_setting[0])
-            im = vertical_shift(im, aug_setting[0])
         elif args.augment_type == 'random_scale':
             im = get_scaled_image_aug(im, aug_setting[1])
-            im = vertical_shift(im, aug_setting[0])
-        elif args.augment_type == 'random_shift':
-            im = get_scaled_image_aug(im, aug_setting[0])
-            im = vertical_shift(im, aug_setting[1])
         im = horizontal_pad(im, allowed_lengths)
         if im is None:
             num_fail += 1
             continue
+        if args.augment_type == 'no_aug' or 'random_scale':
+            im = vertical_shift(im, aug_setting[0])
+        elif args.augment_type == 'random_shift':
+            im = vertical_shift(im, aug_setting[1])
         if args.num_channels == 1:
             data = np.transpose(im, (1, 0))
         elif args.num_channels == 3:
diff --git a/egs/madcat_ar/v1/local/tl/augment_data.sh b/egs/madcat_ar/v1/local/tl/augment_data.sh
index 8251021acb7..cc44aa58a62 100755
--- a/egs/madcat_ar/v1/local/tl/augment_data.sh
+++ b/egs/madcat_ar/v1/local/tl/augment_data.sh
@@ -8,6 +8,7 @@
 nj=4
 cmd=run.pl
 feat_dim=40
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -26,6 +27,7 @@ for set in $aug_set; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --vertical-shift $verticle_shift \
     --augment 'random_shift' $datadir/augmentations/$set
 done
 
diff --git a/egs/yomdle_fa/v1/local/augment_data.sh b/egs/yomdle_fa/v1/local/augment_data.sh
index 20fb1f415d4..1c38bcb072d 100755
--- a/egs/yomdle_fa/v1/local/augment_data.sh
+++ b/egs/yomdle_fa/v1/local/augment_data.sh
@@ -9,6 +9,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 fliplr=false
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -27,7 +28,9 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --vertical-shift $verticle_shift \
     --fliplr $fliplr --augment 'random_scale' $datadir/augmentations/$set
+
 done
 
 echo " combine original data and data from different augmentations"
diff --git a/egs/yomdle_tamil/v1/local/augment_data.sh b/egs/yomdle_tamil/v1/local/augment_data.sh
index da5213fba65..136bfd24eb2 100755
--- a/egs/yomdle_tamil/v1/local/augment_data.sh
+++ b/egs/yomdle_tamil/v1/local/augment_data.sh
@@ -8,6 +8,7 @@
 nj=4
 cmd=run.pl
 feat_dim=40
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -26,6 +27,7 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --vertical-shift $verticle_shift \
     --fliplr false --augment 'random_scale' $datadir/augmentations/$set
 done
 
diff --git a/egs/yomdle_zh/v1/local/augment_data.sh b/egs/yomdle_zh/v1/local/augment_data.sh
index 20fb1f415d4..1f13ed15ded 100755
--- a/egs/yomdle_zh/v1/local/augment_data.sh
+++ b/egs/yomdle_zh/v1/local/augment_data.sh
@@ -9,6 +9,7 @@ nj=4
 cmd=run.pl
 feat_dim=40
 fliplr=false
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -27,6 +28,7 @@ for set in aug1; do
     $srcdir $datadir/augmentations/$set
   cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --vertical-shift $verticle_shift \
     --fliplr $fliplr --augment 'random_scale' $datadir/augmentations/$set
 done
 

From c33da9fa2d02015a0fcb351aa63555d50b679873 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 16:19:53 -0400
Subject: [PATCH 66/67] adding doocumentation

---
 egs/madcat_ar/v1/local/tl/process_waldo_data.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/egs/madcat_ar/v1/local/tl/process_waldo_data.py b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
index b7a24807c5a..0d278e64122 100755
--- a/egs/madcat_ar/v1/local/tl/process_waldo_data.py
+++ b/egs/madcat_ar/v1/local/tl/process_waldo_data.py
@@ -1,5 +1,13 @@
 #!/usr/bin/env python3
 
+""" This script reads image and transcription mapping and creates the following files :text, utt2spk, images.scp.
+  Eg. local/process_waldo_data.py lines/hyp_line_image_transcription_mapping_kaldi.txt data/test
+  Eg. text file: LDC0001_000404_NHR_ARB_20070113.0052_11_LDC0001_00z2 ﻮﺠﻫ ﻮﻌﻘﻟ ﻍﺍﺮﻗ ﺢﺗّﻯ ﺎﻠﻨﺧﺎﻋ
+      utt2spk file: LDC0001_000397_NHR_ARB_20070113.0052_11_LDC0001_00z1 LDC0001
+      images.scp file: LDC0009_000000_arb-NG-2-76513-5612324_2_LDC0009_00z0
+      data/local/lines/1/arb-NG-2-76513-5612324_2_LDC0009_00z0.tif
+"""
+
 import argparse
 import os
 import sys

From ee42879d06fa83ab87692fe89737bd2b2e3e82f7 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 9 Oct 2018 16:52:41 -0400
Subject: [PATCH 67/67] modification from review

---
 egs/cifar/v1/image/ocr/make_features.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/egs/cifar/v1/image/ocr/make_features.py b/egs/cifar/v1/image/ocr/make_features.py
index 2c4d44a1990..a11cbcc7a82 100755
--- a/egs/cifar/v1/image/ocr/make_features.py
+++ b/egs/cifar/v1/image/ocr/make_features.py
@@ -163,13 +163,6 @@ def vertical_shift(im, mode='normal'):
 
 num_fail = 0
 num_ok = 0
-if args.augment_type == 'random_scale':
-  aug_setting = ['normal', 'scaled']
-elif args.augment_type == 'random_shift':
-  aug_setting = ['normal', 'notmid']
-else:
-  aug_setting = ['normal']
-
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -180,17 +173,17 @@ def vertical_shift(im, mode='normal'):
         if args.fliplr:
             im = np.fliplr(im)
         if args.augment_type == 'no_aug' or 'random_shift':
-            im = get_scaled_image_aug(im, aug_setting[0])
+            im = get_scaled_image_aug(im, 'normal')
         elif args.augment_type == 'random_scale':
-            im = get_scaled_image_aug(im, aug_setting[1])
+            im = get_scaled_image_aug(im, 'scaled')
         im = horizontal_pad(im, allowed_lengths)
         if im is None:
             num_fail += 1
             continue
         if args.augment_type == 'no_aug' or 'random_scale':
-            im = vertical_shift(im, aug_setting[0])
+            im = vertical_shift(im, 'normal')
         elif args.augment_type == 'random_shift':
-            im = vertical_shift(im, aug_setting[1])
+            im = vertical_shift(im, 'notmid')
         if args.num_channels == 1:
             data = np.transpose(im, (1, 0))
         elif args.num_channels == 3: