From 0ecfe9069d0936b4d703729c8ec2d827b48ca76f Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Wed, 26 Oct 2016 23:04:31 -0400
Subject: [PATCH 01/12] Adding early draft of xconfig library

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 483 +++++++++++++++++++++
 1 file changed, 483 insertions(+)
 create mode 100644 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
new file mode 100644
index 00000000000..437be386c90
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -0,0 +1,483 @@
+from __future__ import print_function
+import subprocess
+import logging
+import math
+import re
+import sys
+import traceback
+import time
+import argparse
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)s - %(funcName)s - %(levelname)s ] %(message)s')
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+
+class StrToBoolAction(argparse.Action):
+    """ A custom action to convert bools from shell format i.e., true/false
+        to python format i.e., True/False """
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values == "true":
+            setattr(namespace, self.dest, True)
+        elif values == "false":
+            setattr(namespace, self.dest, False)
+        else:
+            raise Exception("Unknown value {0} for --{1}".format(values, self.dest))
+
+class NullstrToNoneAction(argparse.Action):
+    """ A custom action to convert empty strings passed by shell
+        to None in python. This is necessary as shell scripts print null strings
+        when a variable is not specified. We could use the more apt None
+        in python. """
+    def __call__(self, parser, namespace, values, option_string=None):
+            if values.strip() == "":
+                setattr(namespace, self.dest, None)
+            else:
+                setattr(namespace, self.dest, values)
+
+
+# This class represents a line that starts with 'input', e.g.
+# 'input name=ivector dim=100', or 'input name=input dim=40'
+class XconfigInputLine:
+    # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }.
+    def __init__(self, key_to_value):
+        if not 'name' in key_to_value:
+            raise Exception("Config line for input does not specify name.")
+        self.name = key_to_value['name']
+        if not IsValidLineName(self.name):
+            raise Exception("Name '{0}' is not a valid node name.".format(self.name))
+        if not 'dim' in key_to_value:
+            raise Exception("Config line for input does not specify dimension.")
+        try:
+            self.dim = int(key_to_value['dim'])
+            assert self.dim > 0
+        except:
+            raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim']))
+
+    # This returns the name of the layer.
+    def Name():
+        return self.name
+
+    # This returns the name of the principal output of the layer.  For
+    # the input layer this is the same as the name.  For an affine layer
+    # 'affine1' it might be e.g. 'affine1.relu'.
+    def OutputName():
+        return self.name
+
+    # note: layers have a function InputDim() also, so we call this dimension function
+    # OutputDim().
+    def OutputDim():
+        return self.dim
+
+
+# A base-class for classes representing lines of xconfig files.
+# This handles the
+class XconfigLineBase:
+    def __init__(self):
+        pass
+
+    def Name():
+        return self.name
+
+    def SetDims():
+        raise Exception("SetDims() not implemented for this class")
+
+
+
+
+
+# This class parses and stores a Descriptor-- expression
+# like Append(Offset(input, -3), input) and so on.
+# For the full range of possible expressions, see the comment at the
+# top of src/nnet3/nnet-descriptor.h.
+# Note: as an extension to the descriptor format used in the C++
+# code, we can have e.g. input@-3 meaning Offset(input, -3);
+# and if bare integer numbers appear where a descriptor was expected,
+# they are interpreted as Offset(prev_layer, -3) where 'prev_layer'
+# is the previous layer in the config file.
+
+# Also, in any place a raw input/layer/output name can appear, we accept things
+# like [-1] meaning the previous input/layer/output's name, or [-2] meaning the
+# last-but-one input/layer/output, and so on.
+class Descriptor:
+    def __init__(self,
+                 descriptor_string = None,
+                 prev_names = None):
+        # self.operator is a string that may be 'Offset', 'Append',
+        # 'Sum', 'Failover', 'IfDefined', 'Offset', 'Switch', 'Round',
+        # 'ReplaceIndex'; it also may be None, representing the base-case
+        # (where it's just a layer name)
+
+        # self.items will be whatever items are
+        # inside the parentheses, e.g. if this is Sum(foo bar),
+        # then items will be [d1, d2], where d1 is a Descriptor for
+        # 'foo' and d1 is a Descriptor for 'bar'.  However, there are
+        # cases where elements of self.items are strings or integers,
+        # for instance in an expression 'ReplaceIndex(ivector, x, 0)',
+        # self.items would be [d, 'x', 0], where d is a Descriptor
+        # for 'ivector'.  In the case where self.operator is None (where
+        # this Descriptor represents just a bare layer name), self.
+        # items contains the name of the input layer as a string.
+        self.operator = None
+        self.items = None
+
+        if descriptor_string != None:
+            try:
+                tokens = TokenizeDescriptor(descriptor_string)
+                pos = 0
+                (d, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+                # note: 'pos' should point to the 'end of string' marker
+                # that terminates 'tokens'.
+                if pos != len(tokens) - 1:
+                    raise Exception("Parsing Descriptor, saw junk at end: " +
+                                    ' '.join(tokens[pos:-1]))
+                # copy members from d.
+                self.operator = d.operator
+                self.items = d.items
+            except Exception as e:
+                traceback.print_tb(sys.exc_info()[2])
+                raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format(
+                    descriptor_string, repr(e)))
+
+
+    def str(self):
+        if self.operator is None:
+            assert len(self.items) == 1 and isinstance(self.items[0], str)
+            return self.items[0]
+        else:
+            assert isinstance(self.operator, str)
+            return self.operator + '(' + ', '.join([str(item) for item in self.items]) + ')'
+
+    def __str__(self):
+        return self.str()
+
+
+# This just checks that seen_item == expected_item, and raises an
+# exception if not.
+def ExpectToken(expected_item, seen_item, what_parsing):
+    if seen_item != expected_item:
+        raise Exception("parsing {0}, expected '{1}' but got '{2}'".format(
+            what_parsing, expected_item, seen_item))
+
+# returns true if 'name' is valid as the name of a line (input, layer or output);
+# this is the same as IsValidName() in the nnet3 code.
+def IsValidLineName(name):
+    return isinstance(name, str) and re.match(r'^[a-zA-Z_][-a-zA-Z_0-9.]*', name) != None
+
+# This function for parsing Descriptors takes an array of tokens as produced
+# by TokenizeDescriptor.  It parses a descriptor
+# starting from position pos >= 0 of the array 'tokens', and
+# returns a new position in the array that reflects any tokens consumed while
+# parsing the descriptor.
+# It returns a pair (d, pos) where d is the newly parsed Descriptor,
+# and 'pos' is the new position after consuming the relevant input.
+def ParseNewDescriptor(tokens, pos,
+                       prev_names):
+    size = len(tokens)
+    first_token = tokens[pos]
+    pos += 1
+    d = Descriptor()
+
+    # when reading this function, be careful to note the indent level,
+    # there is an if-statement within an if-statement.
+    if first_token in [ 'Offset', 'Round', 'ReplaceIndex', 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]:
+        ExpectToken('(', tokens[pos], first_token + '()')
+        pos += 1
+        d.operator = first_token
+        # the 1st argument of all these operators is a Descriptor.
+        (desc, pos) = ParseNewDescriptor(tokens,
+                                         pos, prev_names)
+        d.items = [desc]
+
+        if first_token == 'Offset':
+            ExpectToken(',', tokens[pos], 'Offset()')
+            pos += 1
+            try:
+                t_offset = int(tokens[pos])
+                pos += 1
+                d.items.append(t_offset)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            if tokens[pos] == ')':
+                return (d, pos + 1)
+            elif tokens[pos] != ',':
+                raise Exception("Parsing Offset(), expected ')' or ',', got " + tokens[pos])
+            pos += 1
+            try:
+                x_offset = int(tokens[pos])
+                pos += 1
+                d.items.append(x_offset)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'Offset()')
+            pos += 1
+        elif first_token in [ 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]:
+            while True:
+                if tokens[pos] == ')':
+                    # check num-items is correct for some special cases.
+                    if first_token == 'Failover' and len(d.items) != 2:
+                        raise Exception("Parsing Failover(), expected 2 items but got {0}".format(len(d.items)))
+                    if first_token == 'IfDefined' and len(d.items) != 1:
+                        raise Exception("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items)))
+                    pos += 1
+                    break
+                elif tokens[pos] == ',':
+                    pos += 1  # consume the comma.
+                else:
+                    raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos])
+
+                (desc, pos) = ParseNewDescriptor(tokens,
+                                                 pos, prev_names)
+                d.items.append(desc)
+        elif first_token == 'Round':
+            ExpectToken(',', tokens[pos], 'Round()')
+            pos += 1
+            try:
+                t_modulus = int(tokens[pos])
+                assert t_modulus > 0
+                pos += 1
+                d.items.append(t_modulus)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'Round()')
+            pos += 1
+        elif first_token == 'ReplaceIndex':
+            ExpectToken(',', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+            if tokens[pos] in [ 'x', 't' ]:
+                d.items.append(tokens[pos])
+                pos += 1
+            else:
+                raise Exception("Parsing ReplaceIndex(), expected 'x' or 't', got " +
+                                tokens[pos])
+            ExpectToken(',', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+            try:
+                new_value = int(tokens[pos])
+                pos += 1
+                d.items.append(new_value)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+        else:
+            raise Exception("code error")
+    elif first_token in [ 'end of string', '(', ')', ',', '@' ]:
+        raise Exception("Expected descriptor, got " + first_token)
+    elif IsValidLineName(first_token) or first_token == '[':
+        # This section parses either a raw input/layer/output name, e.g. "affine2"
+        # (which must start with an alphabetic character or underscore),
+        # or something like [-2], optionally followed by an offset like '@-3'.
+        if first_token == '[':
+            try:
+                offset_into_prev_names = int(tokens[pos])
+                assert offset_into_prev_names < 0
+                pos += 1
+            except:
+                raise Exception("Parse error: after '[', expected negative integer, got '{0}'".format(
+                                tokens[pos]))
+            ExpectToken(']', tokens[pos], 'Descriptor')
+            pos += 1
+            assert isinstance(prev_names, list)
+            if -offset_into_prev_names > len(prev_names):
+                raise Exception("Error: expression [{0}] requested, but there are "
+                                "not enough previous input or layer names to satisfy "
+                                "this.".format(offset_into_prev_names))
+            d.operator = None
+            # below, e.g. prev_names[-2] would give the last-but-one layer.
+            d.items = [prev_names[offset_into_prev_names]]
+        else:
+            # 'first_token' starts with a-z, A-Z or _, treat it
+            # as the name of a layer or input node.
+            d.operator = None
+            d.items = [first_token]
+
+        # If the layer-name or expression like [-2] is followed by '@', then
+        # we're parsing something like 'affine1@-3' or '[-2]@3'.
+        if tokens[pos] == '@':
+            pos += 1
+            try:
+                offset_t = int(tokens[pos])
+                pos += 1
+            except:
+                raise Exception("Parse error parsing {0}@{1}".format(
+                    first_token, tokens[pos]))
+            if offset_t != 0:
+                inner_d = d
+                d = Descriptor()
+                # e.g. foo@3 is equivalent to 'Offset(foo, 3)'.
+                d.operator = 'Offset'
+                d.items = [ inner_d, offset_t ]
+    else:
+        # the last possible case is that 'first_token' is just an integer i,
+        # which can appear in things like Append(-3, 0, 3).
+        # See if the token is an integer.
+        # In this case, it's interpreted as the name of previous layer
+        # (with that time offset applied).
+        try:
+            offset_t = int(first_token)
+        except:
+            raise Exception("Parsing descriptor, expected descriptor but got " +
+                            first_token)
+        assert isinstance(prev_names, list)
+        if len(prev_names) < 1:
+            raise Exception("Parsing descriptor, could not interpret '{0}' because "
+                            "there is no previous layer".format(first_token))
+        d.operator = None
+        # the layer name is the name of the most recent layer.
+        d.items = [prev_names[-1]]
+        if offset_t != 0:
+            inner_d = d
+            d = Descriptor()
+            d.operator = 'Offset'
+            d.items = [ inner_d, offset_t ]
+    return (d, pos)
+
+
+
+
+# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors.
+# Note: for convenience in parsing, we add the token 'end-of-string' to this
+# list.
+# The argument 'prev_names' (for the names of previous layers and input and
+# output nodes) is needed to process expressions like [-1] meaning the most
+# recent layer, or [-2] meaning the last layer but one.
+def TokenizeDescriptor(descriptor_string,
+                       prev_names = None):
+    # split on '(', ')', ',', '@', and space.
+    # Note: the parenthesis () in the regexp causes it to output
+    # the stuff inside the () as if it were a field, which is
+    # why we keep characters like '(' and ')' as tokens.
+    fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string)
+    ans = []
+    for f in fields:
+        # don't include fields that are space, or are empty.
+        if re.match(r'^\s*$', f) is None:
+            ans.append(f)
+
+    ans.append('end of string')
+    return ans
+
+
+# This function parses a line in a config file, something like
+# affine-layer name=affine1 input=Append(-3, 0, 3)
+# and returns a pair,
+# (first_token, fields), as (string, dict) e.g. in this case
+# ('affine-layer', {'name':'affine1', 'input':'Append(-3, 0, 3)"
+# Note: spaces are allowed in the field names but = signs are
+# disallowed, which is why it's possible to parse them.
+# This function also removes comments (anything after '#').
+# As a special case, this function will return NULL if the line
+# is empty after removing spaces.
+def ParseConfigLine(orig_config_line):
+    # Remove comments.
+    # note: splitting on '#' will always give at least one field...  python
+    # treats splitting on space as a special case that may give zero fields.
+    config_line = orig_config_line.split('#')[0]
+    # Now split on space; later we may splice things back together.
+    fields=config_line.split()
+    if len(fields) == 0:
+        return None   # Line was only whitespace after removing comments.
+    first_token = fields[0]
+    # if first_token does not look like 'foo-bar' or 'foo-bar2', then die.
+    if re.match('^[a-z][-a-z0-9]+$', first_token) is None:
+        raise Exception("Error parsing config line (first field doesn't look right): {0}".format(
+            orig_config_line))
+    # get rid of the first field which we put in 'first_token'.
+    fields = fields[1:]
+
+    rest_of_line = ' '.join(fields)
+
+    # suppose rest_of_line is: 'input=Append(foo, bar) foo=bar'
+    # then after the below we'll get
+    # fields = ['', 'input', 'Append(foo, bar)', 'foo', 'bar']
+    fields = re.split(r'\s*([-a-zA-Z0-9_]*)=', rest_of_line)
+    if not (fields[0] == '' and len(fields) % 2 ==  1):
+        raise Exception("Could not parse config line: " + orig_config_line)
+    fields = fields[1:]
+    num_variables = len(fields) / 2
+    ans_dict = dict()
+    for i in range(num_variables):
+        var_name = fields[i * 2]
+        var_value = fields[i * 2 + 1]
+        if re.match(r'[a-zA-Z_]', var_name) is None:
+            raise Exception("Expected variable name '{0}' to start with alphabetic character or _, "
+                            "in config line {1}".format(var_name, orig_config_line))
+        if var_name in ans_dict:
+            raise Exception("Config line has multiply defined variable {0}: {1}".format(
+                var_name, orig_config_line))
+        ans_dict[var_name] = var_value
+    return (first_token, ans_dict)
+
+
+# Reads a config file and returns a list of objects, where each object
+# represents one line of the file.
+def ReadConfigFile(filename):
+    try:
+        f = open(filename, "r")
+    except Exception as e:
+        raise Exception("Error reading config file {0}: {1}".format(
+            filename, repr(e)))
+    ans = []
+    prev_names = []
+    while True:
+        line = f.readline()
+        if line == '':
+            break
+        x = ParseConfigLine(line)
+        if x is None:
+            continue  # blank line
+        (first_token, key_to_value) = x
+        layer_object = ConfigLineToObject(first_token, key_to_value, prev_names)
+        ans.append(layer_object)
+        prev_names.append(layer_object.Name())
+
+# turns a config line that has been parsed into
+# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
+# into an object representing that line of the config file.
+# 'prev_names' is a list of the names of preceding lines of the
+# config file.
+def ConfigLineToObject(first_token, key_to_value, prev_names):
+    pass
+
+
+def TestLibrary():
+    TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1]  # remove 'end of string'
+    assert TokenizeTest("hi") == ['hi']
+    assert TokenizeTest("hi there") == ['hi', 'there']
+    assert TokenizeTest("hi,there") == ['hi', ',', 'there']
+    assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there']
+    assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')']
+    assert TokenizeTest("[-1]@2") == ['[', '-1', ']', '@', '2' ]
+
+    assert Descriptor('foo').str() == 'foo'
+    assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)'
+    assert Descriptor('Sum(Offset(foo,1),Offset(foo,0))').str() == 'Sum(Offset(foo, 1), Offset(foo, 0))'
+    for x in [ 'Append(foo, Sum(bar, Offset(baz, 1)))', 'Failover(foo, Offset(bar, -1))',
+               'IfDefined(Round(baz, 3))', 'Switch(foo1, Offset(foo2, 2), Offset(foo3, 3))',
+               'IfDefined(ReplaceIndex(ivector, t, 0))', 'ReplaceIndex(foo, x, 0)' ]:
+        if not Descriptor(x).str() == x:
+            print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), x))
+
+    prev_names = ['last_but_one_layer', 'prev_layer']
+    for x, y in [ ('Sum(foo,bar)', 'Sum(foo, bar)'),
+                  ('Sum(foo1,bar-3_4)', 'Sum(foo1, bar-3_4)'),
+                  ('Append(input@-3, input@0, input@3)',
+                   'Append(Offset(input, -3), input, Offset(input, 3))'),
+                  ('Append(-3,0,3)',
+                   'Append(Offset(prev_layer, -3), prev_layer, Offset(prev_layer, 3))'),
+                  ('[-1]', 'prev_layer'),
+                  ('[-2]', 'last_but_one_layer'),
+                  ('[-2]@3',
+                   'Offset(last_but_one_layer, 3)') ]:
+        if not Descriptor(x, prev_names).str() == y:
+            print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y))
+
+    print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar'))
+
+    print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar'))
+    print(ParseConfigLine('affine-layer'))

From 1553399a897eb8f2d3555b56d1f5fad658eb5cd1 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Thu, 27 Oct 2016 00:29:47 -0400
Subject: [PATCH 02/12] Change how [-1] and the like are parsed in
 xconfig_lib.py

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 80 ++++++++++++----------
 1 file changed, 43 insertions(+), 37 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
index 437be386c90..d97900969b2 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -127,7 +127,7 @@ def __init__(self,
 
         if descriptor_string != None:
             try:
-                tokens = TokenizeDescriptor(descriptor_string)
+                tokens = TokenizeDescriptor(descriptor_string, prev_names)
                 pos = 0
                 (d, pos) = ParseNewDescriptor(tokens, pos, prev_names)
                 # note: 'pos' should point to the 'end of string' marker
@@ -175,8 +175,10 @@ def IsValidLineName(name):
 # parsing the descriptor.
 # It returns a pair (d, pos) where d is the newly parsed Descriptor,
 # and 'pos' is the new position after consuming the relevant input.
-def ParseNewDescriptor(tokens, pos,
-                       prev_names):
+# 'prev_names' is so that we can find the most recent layer name for
+# expressions like Append(-3, 0, 3) which is shorthand for the most recent
+# layer spliced at those time offsets.
+def ParseNewDescriptor(tokens, pos, prev_names):
     size = len(tokens)
     first_token = tokens[pos]
     pos += 1
@@ -189,8 +191,7 @@ def ParseNewDescriptor(tokens, pos,
         pos += 1
         d.operator = first_token
         # the 1st argument of all these operators is a Descriptor.
-        (desc, pos) = ParseNewDescriptor(tokens,
-                                         pos, prev_names)
+        (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names)
         d.items = [desc]
 
         if first_token == 'Offset':
@@ -230,8 +231,7 @@ def ParseNewDescriptor(tokens, pos,
                 else:
                     raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos])
 
-                (desc, pos) = ParseNewDescriptor(tokens,
-                                                 pos, prev_names)
+                (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names)
                 d.items.append(desc)
         elif first_token == 'Round':
             ExpectToken(',', tokens[pos], 'Round()')
@@ -269,35 +269,16 @@ def ParseNewDescriptor(tokens, pos,
     elif first_token in [ 'end of string', '(', ')', ',', '@' ]:
         raise Exception("Expected descriptor, got " + first_token)
     elif IsValidLineName(first_token) or first_token == '[':
-        # This section parses either a raw input/layer/output name, e.g. "affine2"
+        # This section parses a raw input/layer/output name, e.g. "affine2"
         # (which must start with an alphabetic character or underscore),
-        # or something like [-2], optionally followed by an offset like '@-3'.
-        if first_token == '[':
-            try:
-                offset_into_prev_names = int(tokens[pos])
-                assert offset_into_prev_names < 0
-                pos += 1
-            except:
-                raise Exception("Parse error: after '[', expected negative integer, got '{0}'".format(
-                                tokens[pos]))
-            ExpectToken(']', tokens[pos], 'Descriptor')
-            pos += 1
-            assert isinstance(prev_names, list)
-            if -offset_into_prev_names > len(prev_names):
-                raise Exception("Error: expression [{0}] requested, but there are "
-                                "not enough previous input or layer names to satisfy "
-                                "this.".format(offset_into_prev_names))
-            d.operator = None
-            # below, e.g. prev_names[-2] would give the last-but-one layer.
-            d.items = [prev_names[offset_into_prev_names]]
-        else:
-            # 'first_token' starts with a-z, A-Z or _, treat it
-            # as the name of a layer or input node.
-            d.operator = None
-            d.items = [first_token]
+        # optionally followed by an offset like '@-3'.
 
-        # If the layer-name or expression like [-2] is followed by '@', then
-        # we're parsing something like 'affine1@-3' or '[-2]@3'.
+        d.operator = None
+        d.items = [first_token]
+
+        # If the layer-name o is followed by '@', then
+        # we're parsing something like 'affine1@-3' which
+        # is syntactic sugar for 'Offset(affine1, 3)'.
         if tokens[pos] == '@':
             pos += 1
             try:
@@ -346,6 +327,7 @@ def ParseNewDescriptor(tokens, pos,
 # The argument 'prev_names' (for the names of previous layers and input and
 # output nodes) is needed to process expressions like [-1] meaning the most
 # recent layer, or [-2] meaning the last layer but one.
+# The default None for prev_names is only supplied for testing purposes.
 def TokenizeDescriptor(descriptor_string,
                        prev_names = None):
     # split on '(', ')', ',', '@', and space.
@@ -354,9 +336,33 @@ def TokenizeDescriptor(descriptor_string,
     # why we keep characters like '(' and ')' as tokens.
     fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string)
     ans = []
-    for f in fields:
+    i = 0
+    while i < len(fields):
+        f = fields[i]
+        i = i + 1
         # don't include fields that are space, or are empty.
-        if re.match(r'^\s*$', f) is None:
+        if re.match(r'^\s*$', f) is not None:
+            continue
+        if f == '[':
+            if i + 2 >= len(fields):
+                raise Exception("Error tokenizing string '{0}': '[' found too close "
+                                "to the end of the descriptor.".format(descriptor_string))
+            if fields[i+1] != ']':
+                raise Exception("Error tokenizing string '{0}': expected ']', got '{1}'".format(
+                    descriptor_string, fields[i+1]))
+            assert isinstance(prev_names, list)
+            try:
+                offset = int(fields[i])
+                assert offset < 0 and -offset <= len(prev_names)
+                i += 2  # consume the int and the ']'.
+            except:
+                raise Exception("Error tokenizing string '{0}': expression [{1}] has an "
+                                "invalid or out of range offset.".format(descriptor_string, fields[i]))
+            this_field = prev_names[offset]
+            assert IsValidLineName(this_field)  # should already have been
+                                                # checked, so assert.
+            ans.append(this_field)
+        else:
             ans.append(f)
 
     ans.append('end of string')
@@ -452,7 +458,7 @@ def TestLibrary():
     assert TokenizeTest("hi,there") == ['hi', ',', 'there']
     assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there']
     assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')']
-    assert TokenizeTest("[-1]@2") == ['[', '-1', ']', '@', '2' ]
+    assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ]
 
     assert Descriptor('foo').str() == 'foo'
     assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)'

From 6a8ecdbe83b610304fb728d80dbdb908f48ee8a2 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Sun, 30 Oct 2016 16:15:18 -0400
Subject: [PATCH 03/12] Adding some temporary work on xconfigs (will not work
 right now)

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 206 ++++++++++++++++++
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py    | 123 ++++++++---
 2 files changed, 296 insertions(+), 33 deletions(-)
 create mode 100644 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
new file mode 100644
index 00000000000..44541588f7d
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -0,0 +1,206 @@
+from __future__ import print_function
+import subprocess
+import logging
+import math
+import re
+import sys
+import traceback
+import time
+import argparse
+from xconfig_lib import *
+
+# This class represents a line that starts with 'input', e.g.
+# 'input name=ivector dim=100', or 'input name=input dim=40'
+class XconfigInputLine:
+    # Constructor.
+    # first_token must be the string 'input'.
+    # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }.
+    # 'prev_names' is a list of the names of preceding lines of the
+    # config file; it's not used here but is part of the common
+    # interface for xconfig input line constructors.
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'input'
+        if not 'name' in key_to_value:
+            raise Exception("Config line for input does not specify name.")
+        self.name = key_to_value['name']
+        if not IsValidLineName(self.name):
+            raise Exception("Name '{0}' is not a valid node name.".format(self.name))
+        if not 'dim' in key_to_value:
+            raise Exception("Config line for input does not specify dimension.")
+        try:
+            self.dim = int(key_to_value['dim'])
+            assert self.dim > 0
+        if len(key_to_value) > 2:
+            raise Exception("Unused name=value pairs in config line")
+        except:
+            raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim']))
+
+
+    # This returns the name of the layer, e.g. 'input' or 'ivector'.
+    def Name():
+        return self.name
+
+    # This returns the component-node name of the principal output of the layer.  For
+    # the input layer this is the same as the name.  For an affine layer
+    # 'affine1' it might be e.g. 'affine1.renorm'.
+    # The 'qualifier' parameter is for compatibility with other layer
+    # types, which support auxiliary outputs.
+    def OutputName(qualifier = None):
+        assert qualifier == None
+        return self.name
+
+    # The dimension that this layer outputs.
+    # OutputDim().
+    # The 'qualifier' parameter is for compatibility with other layer
+    # types, which support auxiliary outputs.
+    def OutputDim(qualifier = None):
+        assert qualifier == None
+        return self.dim
+
+    # Returns a list of all qualifiers (meaning auxiliary outputs) that this
+    # layer supports (these are either 'None' for the regular output, or a
+    # string such as 'projection' or something like that, for auxiliary outputs.
+    def Qualifiers():
+        return [ None ]
+
+    # This function writes the 'full' config format, as would be read
+    # by the C++ programs.  It writes the config lines to 'file'.
+    # 'all_layers' is a vector of objects (of type XConfigInputLine or
+    # inheriting from XconfigLayerBase), which is used to get
+    # the component names and
+    def GetFullConfig(self, file, all_layers):
+        print("input-node name={0} dim={0}".format(self.name, self.dim)
+
+    def str(self):
+        return 'input name={0} dim={1}'.format(self.name, self.dim)
+
+    def __str__(self):
+        return self.str()
+
+
+
+# A base-class for classes representing layers of xconfig files (but not input
+# nodes).  This handles parsing the Descriptors and other common tasks.
+class XconfigLayerBase(object):
+    # Constructor.
+    # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f
+    # key_to_value is a dict like:
+    # { 'name':'affine1', 'input':'Append(0, 1, 2, ReplaceIndex(ivector, t, 0))', 'dim=1024' }.
+    # The only required and 'special' values that are dealt with directly at this level, are
+    # 'name' and 'input'.
+    # The rest are put in self.config and are dealt with by the child classes' init functions.
+    # prev_names is an array of the names (xxx in 'name=xxx') of previous
+    # lines of the config file.
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        self.layer_type = first_token
+        if not 'name' in key_to_value
+            raise Exception("Expected 'name' to be specified.")
+        self.name = key_to_value['name']
+        if not IsValidLineName(self.name):
+            raise Exception("Invalid value: name={0}".format(key_to_value['name']))
+
+        if not 'input' in key_to_value
+            raise Exception("Expected 'name' to be specified.")
+        input_descriptor_str = key_to_value[input]
+        tokens = TokenizeDescriptor(input_descriptor_str, prev_names)
+        pos = 0
+        (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+        # note: 'pos' should point to the 'end of string' marker
+        # that terminates 'tokens'.
+        if pos != len(tokens) - 1:
+            raise Exception("Parsing Descriptor, saw junk at end: " +
+                            ' '.join(tokens[pos:-1]))
+        # the following, which should be overridden in the child class, sets
+        # default config parameters in self.config.
+        self.SetDefaultConfigs()
+        self._OverrideConfigs()
+        # the following, which should be overridden in the child class, checks
+        # that the config parameters that have been set are reasonable.
+        self.CheckConfigs()
+
+
+    # We broke this code out of __init__ for clarity.
+    def _OverrideConfigs(key_to_value):
+        # the child-class constructor will deal with the configuration values
+        # in a more specific way.
+        for key,value in key_to_value.items():
+            if key != 'name' and key != 'input':
+                if not key in self.config:
+                    raise Exception("Configuration value {0}={1} was not expected in "
+                                    "layer of type {2}".format(key, value, self.layer_type))
+                if isinstance(value, bool):
+                self.config[key] = ConvertValueToType(key, type(self.config[key]),
+                                                      value)
+
+    def GetDefaultConfigs():
+        raise Exception("Child classes must override GetDefaultConfigs().")
+
+
+    # child classes may override this but do not have to.
+    def CheckConfigs():
+        pass
+
+
+    # Returns a list of all qualifiers (meaning auxiliary outputs) that this
+    # layer supports (these are either 'None' for the regular output, or a
+    # string such as 'projection' or something like that, for auxiliary outputs.
+    # This is a default implementation of the function.
+    def Qualifiers():
+        return [ None ]
+
+    # This returns the component-node name of the principal output of the layer.  For
+    # the input layer this is the same as the name.  For an affine layer
+    # 'affine1' it might be e.g. 'affine1.renorm'.
+    # The 'qualifier' parameter is for compatibility with other layer
+    # types, which support auxiliary outputs.
+    def OutputName(qualifier = None):
+        raise Exception("Child classes must override OutputName()")
+
+    # The dimension that this layer outputs.
+    # The 'qualifier' parameter is to support
+    # types, which support auxiliary outputs.
+    def OutputDim(qualifier = None):
+        raise Exception("Child classes must override OutputDim()")
+
+
+    # This function writes the 'full' config format, as would be read
+    # by the C++ programs.  It writes the config lines to 'file'.
+    # 'all_layers' is a vector of objects (of type XConfigInputLine or
+    # inheriting from XconfigLayerBase), which is used to get
+    # the component names and dimensions at the input.
+    def GetFullConfig(self, file, all_layers):
+        raise Exception("Child classes must override GetFullConfig()")
+
+    # Name() returns the name of this layer, e.g. 'affine1'.  It does not
+    # necessarily correspond to a component name.
+    def Name():
+        return self.name
+
+    def str(self):
+        ans = '{0} name={1}'.format(self.layer_type, self.name)
+        ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key])
+                                for key in sorted(self.config.keys())])
+        return ans
+
+    def __str__(self):
+        return self.str()
+
+
+
+# Uses ParseConfigLine() to turn a config line that has been parsed into
+# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
+# and then turns this into an object representing that line of the config file.
+# 'prev_names' is a list of the names of preceding lines of the
+# config file.
+def ConfigLineToObject(config_line, prev_names = None):
+    (first_token, key_to_value) = ParseConfigLine(config_line)
+
+    if first_token == 'input':
+        return XconfigInputLine(key_to_value)
+
+
+def TestLayers():
+    # for some config lines that should be printed the same way as they
+    # are read, check that this is the case.
+    for x in [ 'input name=input dim=30' ]:
+        assert str(ConfigLineToObject(x, [])) == x
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
index d97900969b2..f38d28c72a6 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -39,12 +39,42 @@ def __call__(self, parser, namespace, values, option_string=None):
             else:
                 setattr(namespace, self.dest, values)
 
+# This function, used in converting string values in config lines to
+# configuration values in self.config in layers, attempts to
+# convert 'string_value' to an instance dest_type (which is of type Type)
+# 'key' is only needed for printing errors.
+class ConvertValueToType(key, dest_type, string_value):
+    if dest_type == type(bool()):
+        if string_value == "True" or string_value == "true":
+            return True
+        elif string_value == "False" or string_value == "false":
+            return False
+        else:
+            raise Exception("Invalid configuration value {0}={1} (expected bool)".format(
+                key, string_value))
+    elif dest_type == type(int()):
+        try:
+            return int(string_value)
+        except:
+            raise Exception("Invalid configuration value {0}={1} (expected int)".format(
+                key, string_value)
+    elif dest_type == type(float()):
+        try:
+            return float(string_value)
+        except:
+            raise Exception("Invalid configuration value {0}={1} (expected int)".format(
+                key, string_value)
+    elif dest_type == type(str()):
+        return sting_value
+
 
 # This class represents a line that starts with 'input', e.g.
 # 'input name=ivector dim=100', or 'input name=input dim=40'
 class XconfigInputLine:
     # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }.
-    def __init__(self, key_to_value):
+    # prev_layer_names is not used here but other constructors for lines
+    # use it, so we must too.
+    def __init__(self, key_to_value, prev_layer_names = None):
         if not 'name' in key_to_value:
             raise Exception("Config line for input does not specify name.")
         self.name = key_to_value['name']
@@ -73,6 +103,13 @@ def OutputName():
     def OutputDim():
         return self.dim
 
+    def str(self):
+        return 'input name={0} dim={1}'.format(self.name, self.dim)
+
+    def __str__(self):
+        return self.str()
+
+
 
 # A base-class for classes representing lines of xconfig files.
 # This handles the
@@ -319,37 +356,29 @@ def ParseNewDescriptor(tokens, pos, prev_names):
     return (d, pos)
 
 
-
-
-# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors.
-# Note: for convenience in parsing, we add the token 'end-of-string' to this
-# list.
-# The argument 'prev_names' (for the names of previous layers and input and
-# output nodes) is needed to process expressions like [-1] meaning the most
-# recent layer, or [-2] meaning the last layer but one.
-# The default None for prev_names is only supplied for testing purposes.
-def TokenizeDescriptor(descriptor_string,
-                       prev_names = None):
-    # split on '(', ')', ',', '@', and space.
-    # Note: the parenthesis () in the regexp causes it to output
-    # the stuff inside the () as if it were a field, which is
-    # why we keep characters like '(' and ')' as tokens.
-    fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string)
-    ans = []
+# This function takes a string 'descriptor_string' which might
+# look like 'Append([-1], [-2], input)', and a list of previous layer
+# names like prev_names = ['foo', 'bar', 'baz'], and replaces
+# the integers in brackets with the previous layers.  -1 means
+# the most recent previous layer ('baz' in this case), -2
+# means the last layer but one ('bar' in this case), and so on.
+# It will throw an exception if the number is out of range.
+# If there are no such expressions in the string, it's OK if
+# prev_names == None (this is useful for testing).
+def ReplaceBracketExpressionsInDescriptor(descriptor_string,
+                                         prev_names = None):
+    fields = re.split(r'(\[|\])\s*', descriptor_string)
+    out_fields = []
     i = 0
     while i < len(fields):
         f = fields[i]
-        i = i + 1
-        # don't include fields that are space, or are empty.
-        if re.match(r'^\s*$', f) is not None:
-            continue
-        if f == '[':
+        i += 1
+        if f == ']':
+            raise Exception("Unmatched ']' in descriptor")
+        elif f == '[':
             if i + 2 >= len(fields):
                 raise Exception("Error tokenizing string '{0}': '[' found too close "
                                 "to the end of the descriptor.".format(descriptor_string))
-            if fields[i+1] != ']':
-                raise Exception("Error tokenizing string '{0}': expected ']', got '{1}'".format(
-                    descriptor_string, fields[i+1]))
             assert isinstance(prev_names, list)
             try:
                 offset = int(fields[i])
@@ -359,10 +388,33 @@ def TokenizeDescriptor(descriptor_string,
                 raise Exception("Error tokenizing string '{0}': expression [{1}] has an "
                                 "invalid or out of range offset.".format(descriptor_string, fields[i]))
             this_field = prev_names[offset]
-            assert IsValidLineName(this_field)  # should already have been
-                                                # checked, so assert.
-            ans.append(this_field)
+            out_fields.append(this_field)
         else:
+            out_fields.append(f)
+    return ''.join(out_fields)
+
+
+
+# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors.
+# Note: for convenience in parsing, we add the token 'end-of-string' to this
+# list.
+# The argument 'prev_names' (for the names of previous layers and input and
+# output nodes) is needed to process expressions like [-1] meaning the most
+# recent layer, or [-2] meaning the last layer but one.
+# The default None for prev_names is only supplied for testing purposes.
+def TokenizeDescriptor(descriptor_string,
+                       prev_names = None):
+    # split on '(', ')', ',', '@', and space.  Note: the parenthesis () in the
+    # regexp causes it to output the stuff inside the () as if it were a field,
+    # which is how the call to re.split() keeps characters like '(' and ')' as
+    # tokens.
+    fields = re.split(r'(\(|\)|@|,|\s)\s*',
+                      ReplaceBracketExpressionsInDescriptor(descriptor_string,
+                                                            prev_names))
+    ans = []
+    for f in fields:
+        # don't include fields that are space, or are empty.
+        if re.match(r'^\s*$', f) is None:
             ans.append(f)
 
     ans.append('end of string')
@@ -442,13 +494,16 @@ def ReadConfigFile(filename):
         ans.append(layer_object)
         prev_names.append(layer_object.Name())
 
-# turns a config line that has been parsed into
+# Uses ParseConfigLine() to turn a config line that has been parsed into
 # a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
-# into an object representing that line of the config file.
+# and then turns this into an object representing that line of the config file.
 # 'prev_names' is a list of the names of preceding lines of the
 # config file.
-def ConfigLineToObject(first_token, key_to_value, prev_names):
-    pass
+def ConfigLineToObject(config_line, prev_names = None):
+    (first_token, key_to_value) = ParseConfigLine(config_line)
+
+    if first_token == 'input':
+        return XconfigInputLine(key_to_value)
 
 
 def TestLibrary():
@@ -459,6 +514,7 @@ def TestLibrary():
     assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there']
     assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')']
     assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ]
+    assert TokenizeDescriptor("[-2].special@2", ['foo', 'bar'])[:-1] == ['foo.special', '@', '2' ]
 
     assert Descriptor('foo').str() == 'foo'
     assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)'
@@ -483,6 +539,7 @@ def TestLibrary():
         if not Descriptor(x, prev_names).str() == y:
             print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y))
 
+
     print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar'))
 
     print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar'))

From 9adc26ce2a8f3ff3f681e8ea5a5ecae7e09a545b Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Tue, 1 Nov 2016 23:26:44 -0400
Subject: [PATCH 04/12] Some partial work

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 337 +++++++++-----
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py    | 127 ++----
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  | 421 ++++++++++++++++++
 3 files changed, 682 insertions(+), 203 deletions(-)
 create mode 100755 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index 44541588f7d..97f8c4846b6 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -9,78 +9,94 @@
 import argparse
 from xconfig_lib import *
 
-# This class represents a line that starts with 'input', e.g.
-# 'input name=ivector dim=100', or 'input name=input dim=40'
-class XconfigInputLine:
-    # Constructor.
-    # first_token must be the string 'input'.
-    # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }.
-    # 'prev_names' is a list of the names of preceding lines of the
-    # config file; it's not used here but is part of the common
-    # interface for xconfig input line constructors.
-    def __init__(self, first_token, key_to_value, prev_names = None):
-        assert first_token == 'input'
-        if not 'name' in key_to_value:
-            raise Exception("Config line for input does not specify name.")
-        self.name = key_to_value['name']
-        if not IsValidLineName(self.name):
-            raise Exception("Name '{0}' is not a valid node name.".format(self.name))
-        if not 'dim' in key_to_value:
-            raise Exception("Config line for input does not specify dimension.")
-        try:
-            self.dim = int(key_to_value['dim'])
-            assert self.dim > 0
-        if len(key_to_value) > 2:
-            raise Exception("Unused name=value pairs in config line")
-        except:
-            raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim']))
-
-
-    # This returns the name of the layer, e.g. 'input' or 'ivector'.
-    def Name():
-        return self.name
-
-    # This returns the component-node name of the principal output of the layer.  For
-    # the input layer this is the same as the name.  For an affine layer
-    # 'affine1' it might be e.g. 'affine1.renorm'.
-    # The 'qualifier' parameter is for compatibility with other layer
-    # types, which support auxiliary outputs.
-    def OutputName(qualifier = None):
-        assert qualifier == None
-        return self.name
-
-    # The dimension that this layer outputs.
-    # OutputDim().
-    # The 'qualifier' parameter is for compatibility with other layer
-    # types, which support auxiliary outputs.
-    def OutputDim(qualifier = None):
-        assert qualifier == None
-        return self.dim
-
-    # Returns a list of all qualifiers (meaning auxiliary outputs) that this
-    # layer supports (these are either 'None' for the regular output, or a
-    # string such as 'projection' or something like that, for auxiliary outputs.
-    def Qualifiers():
-        return [ None ]
-
-    # This function writes the 'full' config format, as would be read
-    # by the C++ programs.  It writes the config lines to 'file'.
-    # 'all_layers' is a vector of objects (of type XConfigInputLine or
-    # inheriting from XconfigLayerBase), which is used to get
-    # the component names and
-    def GetFullConfig(self, file, all_layers):
-        print("input-node name={0} dim={0}".format(self.name, self.dim)
-
-    def str(self):
-        return 'input name={0} dim={1}'.format(self.name, self.dim)
-
-    def __str__(self):
-        return self.str()
-
-
-
-# A base-class for classes representing layers of xconfig files (but not input
-# nodes).  This handles parsing the Descriptors and other common tasks.
+# Given a list of objects of type XconfigLayerBase ('all_layers'),
+# including at least the layers preceding 'current_layer' (and maybe
+# more layers), return the names of layers preceding 'current_layer'
+# This will be used in parsing expressions like [-1] in descriptors
+# (which is an alias for the previous layer).
+def GetPrevNames(all_layers, current_layer):
+    assert current_layer in all_layers
+    prev_names = []
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        prev_names.append(layer.Name())
+    return prev_names
+
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a dimension.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetDimFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise Exception("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputDim(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise Exception("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise Exception("No such layer: '{0}'".format(layer_name))
+
+
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple
+# component-node name) that can appear in the generated config file.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetStringFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise Exception("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputName(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise Exception("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise Exception("No such layer: '{0}'".format(layer_name))
+
+
+
+# A base-class for classes representing layers of xconfig files.
+# This mainly just sets self.layer_type, self.name and self.config,
 class XconfigLayerBase(object):
     # Constructor.
     # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f
@@ -91,36 +107,28 @@ class XconfigLayerBase(object):
     # The rest are put in self.config and are dealt with by the child classes' init functions.
     # prev_names is an array of the names (xxx in 'name=xxx') of previous
     # lines of the config file.
+
     def __init__(self, first_token, key_to_value, prev_names = None):
         self.layer_type = first_token
-        if not 'name' in key_to_value
+        if not 'name' in key_to_value:
             raise Exception("Expected 'name' to be specified.")
         self.name = key_to_value['name']
         if not IsValidLineName(self.name):
             raise Exception("Invalid value: name={0}".format(key_to_value['name']))
 
-        if not 'input' in key_to_value
-            raise Exception("Expected 'name' to be specified.")
-        input_descriptor_str = key_to_value[input]
-        tokens = TokenizeDescriptor(input_descriptor_str, prev_names)
-        pos = 0
-        (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names)
-        # note: 'pos' should point to the 'end of string' marker
-        # that terminates 'tokens'.
-        if pos != len(tokens) - 1:
-            raise Exception("Parsing Descriptor, saw junk at end: " +
-                            ' '.join(tokens[pos:-1]))
         # the following, which should be overridden in the child class, sets
         # default config parameters in self.config.
         self.SetDefaultConfigs()
-        self._OverrideConfigs()
+        # The following is not to be reimplemented in child classes;
+        # sets the config files to those specified by the user.
+        self._SetConfigs(key_to_value)
         # the following, which should be overridden in the child class, checks
         # that the config parameters that have been set are reasonable.
         self.CheckConfigs()
 
 
     # We broke this code out of __init__ for clarity.
-    def _OverrideConfigs(key_to_value):
+    def _SetConfigs(self, key_to_value):
         # the child-class constructor will deal with the configuration values
         # in a more specific way.
         for key,value in key_to_value.items():
@@ -128,63 +136,146 @@ def _OverrideConfigs(key_to_value):
                 if not key in self.config:
                     raise Exception("Configuration value {0}={1} was not expected in "
                                     "layer of type {2}".format(key, value, self.layer_type))
-                if isinstance(value, bool):
-                self.config[key] = ConvertValueToType(key, type(self.config[key]),
-                                                      value)
+                self.config[key] = ConvertValueToType(key, type(self.config[key]), value)
 
-    def GetDefaultConfigs():
-        raise Exception("Child classes must override GetDefaultConfigs().")
 
+    # This function converts 'this' to a string which could be printed to an
+    # xconfig file; in xconfig_to_configs.py we actually expand all the lines to
+    # strings and write it as xconfig.expanded as a reference (so users can
+    # see any defaults).
+    def str(self):
+        ans = '{0} name={1}'.format(self.layer_type, self.name)
+        ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key])
+                                for key in sorted(self.config.keys())])
+        return ans
 
-    # child classes may override this but do not have to.
+    def __str__(self):
+        return self.str()
+
+    # This function, which is a convenience function intended to be called from
+    # child classes, converts a string representing a descriptor
+    # ('descriptor_string') into an object of type Descriptor, and returns it.
+    # It needs 'self' and 'all_layers' (where 'all_layers' is a list of objects
+    # of type XconfigLayerBase) so that it can work out a list of the names of
+    # other layers, and get dimensions from them.
+    def ConvertToDescriptor(self, descriptor_string, all_layers):
+        prev_names = GetPrevNames(all_layers, self)
+        tokens = TokenizeDescriptor(descriptor_string, prev_names)
+        pos = 0
+        (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+        # note: 'pos' should point to the 'end of string' marker
+        # that terminates 'tokens'.
+        if pos != len(tokens) - 1:
+            raise Exception("Parsing Descriptor, saw junk at end: " +
+                            ' '.join(tokens[pos:-1]))
+
+    # Returns the dimension of a Descriptor object.
+    # This is a convenience function provided for use in child classes;
+    def GetDimForDescriptor(self, descriptor, all_layers):
+        layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name)
+        return descriptor.Dim(layer_to_dim_func)
+
+    # Returns the 'final' string form of a Descriptor object, as could be used
+    # in config files.
+    # This is a convenience function provided for use in child classes;
+    def GetStringForDescriptor(self, descriptor, all_layers):
+        layer_to_string_func = lambda name: GetStringFromLayerName(all_layers, self, name)
+        return descriptor.ConfigString(layer_to_string_func)
+
+    # Name() returns the name of this layer, e.g. 'affine1'.  It does not
+    # necessarily correspond to a component name.
+    def Name():
+        return self.name
+
+    ######  Functions that should be overridden by the child class: #####
+
+    # child classes should override this.
+    def SetDefaultConfigs():
+        raise Exception("Child classes must override SetDefaultConfigs().")
+
+    # child classes should override this.
     def CheckConfigs():
         pass
 
-
     # Returns a list of all qualifiers (meaning auxiliary outputs) that this
-    # layer supports (these are either 'None' for the regular output, or a
-    # string such as 'projection' or something like that, for auxiliary outputs.
-    # This is a default implementation of the function.
+    # layer supports.  These are either 'None' for the regular output, or a
+    # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that
+    # the layer might provide.  Most layer types will not need to override this.
     def Qualifiers():
         return [ None ]
 
-    # This returns the component-node name of the principal output of the layer.  For
-    # the input layer this is the same as the name.  For an affine layer
-    # 'affine1' it might be e.g. 'affine1.renorm'.
-    # The 'qualifier' parameter is for compatibility with other layer
-    # types, which support auxiliary outputs.
+    # Called with qualifier == None, this returns the component-node name of the
+    # principal output of the layer (or if you prefer, the text form of a
+    # descriptor that gives you such an output; such as Append(some_node,
+    # some_other_node)).
+    # The 'qualifier' argument is a text value that is designed for extensions
+    # to layers that have additional auxiliary outputs.  For example, to implement
+    # a highway LSTM you need the memory-cell of a layer, so you might allow
+    # qualifier='memory_cell' for such a layer type, and it would return the
+    # component node or a suitable Descriptor: something like 'lstm3.c_t'
     def OutputName(qualifier = None):
         raise Exception("Child classes must override OutputName()")
 
-    # The dimension that this layer outputs.
-    # The 'qualifier' parameter is to support
-    # types, which support auxiliary outputs.
+    # The dimension that this layer outputs.  The 'qualifier' parameter is for
+    # layer types which support auxiliary outputs.
     def OutputDim(qualifier = None):
         raise Exception("Child classes must override OutputDim()")
 
-
-    # This function writes the 'full' config format, as would be read
-    # by the C++ programs.  It writes the config lines to 'file'.
-    # 'all_layers' is a vector of objects (of type XConfigInputLine or
-    # inheriting from XconfigLayerBase), which is used to get
-    # the component names and dimensions at the input.
-    def GetFullConfig(self, file, all_layers):
+    # This function returns lines destined for the 'full' config format, as
+    # would be read by the C++ programs.
+    # Since the program xconfig_to_configs.py writes several config files, this
+    # function returns a list of pairs of the form (config_file_basename, line),
+    # e.g. something like
+    # [ ('init', 'input-node name=input dim=40'),
+    #   ('ref', 'input-node name=input dim=40') ]
+    # which would be written to config_dir/init.config and config_dir/ref.config.
+    #
+    # 'all_layers' is a vector of objects inheriting from XconfigLayerBase,
+    # which is used to get the component names and dimensions at the input.
+    def GetFullConfig(self, all_layers):
         raise Exception("Child classes must override GetFullConfig()")
 
-    # Name() returns the name of this layer, e.g. 'affine1'.  It does not
-    # necessarily correspond to a component name.
-    def Name():
-        return self.name
 
-    def str(self):
-        ans = '{0} name={1}'.format(self.layer_type, self.name)
-        ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key])
-                                for key in sorted(self.config.keys())])
-        return ans
+# This class is for lines like
+# 'input name=input dim=40'
+# or
+# 'input name=ivector dim=100'
+# in the config file.
+class XconfigInputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'input'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
 
-    def __str__(self):
-        return self.str()
 
+    def SetDefaultConfigs(self):
+        self.config = { 'dim':-1 }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise Exception("Dimension of input-layer '{0}' is not set".format(self.name))
+
+    def OutputName(qualifier = None):
+        assert qualifier is None
+        return self.name
+
+    def OutputDim(qualifier = None):
+        assert qualifier is None
+        return self.config['dim']
+
+    def GetFullConfig(self, all_layers):
+        # the input layers need to be printed in 'init.config' (which
+        # initializes the neural network prior to the LDA), in 'ref.config',
+        # which is a version of the config file used for getting left and right
+        # context (it doesn't read anything for the LDA-like transform and/or
+        # presoftmax-prior-scale components)
+        # In 'full.config' we write everything, this is just for reference,
+        # and also for cases where we don't use the LDA-like transform.
+        ans = []
+        for config_name in [ 'init', 'ref', 'full' ]:
+            ans.append( (config_name,
+                         'input-node name={0} dim={1}'.format(self.name,
+                                                              self.config['dim'])))
+        return ans
 
 
 # Uses ParseConfigLine() to turn a config line that has been parsed into
@@ -196,7 +287,7 @@ def ConfigLineToObject(config_line, prev_names = None):
     (first_token, key_to_value) = ParseConfigLine(config_line)
 
     if first_token == 'input':
-        return XconfigInputLine(key_to_value)
+        return XconfigInputLayer(first_token, key_to_value)
 
 
 def TestLayers():
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
index f38d28c72a6..17a25f9fbb4 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -8,14 +8,6 @@
 import time
 import argparse
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler()
-handler.setLevel(logging.INFO)
-formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)s - %(funcName)s - %(levelname)s ] %(message)s')
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
 
 class StrToBoolAction(argparse.Action):
     """ A custom action to convert bools from shell format i.e., true/false
@@ -43,7 +35,7 @@ def __call__(self, parser, namespace, values, option_string=None):
 # configuration values in self.config in layers, attempts to
 # convert 'string_value' to an instance dest_type (which is of type Type)
 # 'key' is only needed for printing errors.
-class ConvertValueToType(key, dest_type, string_value):
+def ConvertValueToType(key, dest_type, string_value):
     if dest_type == type(bool()):
         if string_value == "True" or string_value == "true":
             return True
@@ -57,75 +49,17 @@ class ConvertValueToType(key, dest_type, string_value):
             return int(string_value)
         except:
             raise Exception("Invalid configuration value {0}={1} (expected int)".format(
-                key, string_value)
+                key, string_value))
     elif dest_type == type(float()):
         try:
             return float(string_value)
         except:
             raise Exception("Invalid configuration value {0}={1} (expected int)".format(
-                key, string_value)
+                key, string_value))
     elif dest_type == type(str()):
         return sting_value
 
 
-# This class represents a line that starts with 'input', e.g.
-# 'input name=ivector dim=100', or 'input name=input dim=40'
-class XconfigInputLine:
-    # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }.
-    # prev_layer_names is not used here but other constructors for lines
-    # use it, so we must too.
-    def __init__(self, key_to_value, prev_layer_names = None):
-        if not 'name' in key_to_value:
-            raise Exception("Config line for input does not specify name.")
-        self.name = key_to_value['name']
-        if not IsValidLineName(self.name):
-            raise Exception("Name '{0}' is not a valid node name.".format(self.name))
-        if not 'dim' in key_to_value:
-            raise Exception("Config line for input does not specify dimension.")
-        try:
-            self.dim = int(key_to_value['dim'])
-            assert self.dim > 0
-        except:
-            raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim']))
-
-    # This returns the name of the layer.
-    def Name():
-        return self.name
-
-    # This returns the name of the principal output of the layer.  For
-    # the input layer this is the same as the name.  For an affine layer
-    # 'affine1' it might be e.g. 'affine1.relu'.
-    def OutputName():
-        return self.name
-
-    # note: layers have a function InputDim() also, so we call this dimension function
-    # OutputDim().
-    def OutputDim():
-        return self.dim
-
-    def str(self):
-        return 'input name={0} dim={1}'.format(self.name, self.dim)
-
-    def __str__(self):
-        return self.str()
-
-
-
-# A base-class for classes representing lines of xconfig files.
-# This handles the
-class XconfigLineBase:
-    def __init__(self):
-        pass
-
-    def Name():
-        return self.name
-
-    def SetDims():
-        raise Exception("SetDims() not implemented for this class")
-
-
-
-
 
 # This class parses and stores a Descriptor-- expression
 # like Append(Offset(input, -3), input) and so on.
@@ -180,6 +114,19 @@ def __init__(self,
                 raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format(
                     descriptor_string, repr(e)))
 
+    # This is like the str() function, but it uses the layer_to_string function
+    # (which is a function from strings to strings) to convert layer names (or
+    # in general sub-layer names of the form 'foo.bar') to the component-node
+    # (or, in general, descriptor) names that appear in the final config file.
+    # This mechanism gives those designing layer types the freedom to name their
+    # nodes as they want.
+    def ConfigString(self, layer_to_string):
+        if self.operator is None:
+            assert len(self.items) == 1 and isinstance(self.items[0], str)
+            return layer_to_node(self.items[0])
+        else:
+            assert isinstance(self.operator, str)
+            return self.operator + '(' + ', '.join([OutputString(item, layer_to_node) for item in self.items]) + ')'
 
     def str(self):
         if self.operator is None:
@@ -192,6 +139,37 @@ def str(self):
     def __str__(self):
         return self.str()
 
+    # This function returns the dimension (i.e. the feature dimension) of the
+    # descriptor.  It takes 'layer_to_dim' which is a function from
+    # layer-names (including sub-layer names, like lstm1.memory_cell) to
+    # dimensions, e.g. you might have layer_to_dim('ivector') = 100, or
+    # layer_to_dim('affine1') = 1024.
+    # note: layer_to_dim will raise an exception if a nonexistent layer or
+    # sub-layer is requested.
+    def Dim(self, layer_to_dim):
+        if self.operator is None:
+            # base-case: self.items = [ layer_name ] (or sub-layer name, like
+            # 'lstm.memory_cell').
+            return layer_to_dim(self.items[0])
+        elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]:
+            # these are all operators for which all args are descriptors
+            # and must have the same dim.
+            dim = self.items[0].Dim()
+            for desc in self.items[1:]:
+                next_dim = desc.Dim()
+                if next_dim != dim:
+                    raise Exception("In descriptor {0}, different fields have different "
+                                    "dimensions: {1} != {2}".format(self.str(), dim, next_dim))
+            return dim
+        elif self.operator in [  'Offset', 'Round', 'ReplaceIndex' ]:
+            # for these operators, only the 1st arg is relevant.
+            return self.items[0].Dim()
+        elif self.operator == 'Append':
+            return sum([ x.Dim() for x in self.items])
+        else:
+            raise Exception("Unknown operator {0}".format(self.operator))
+
+
 
 # This just checks that seen_item == expected_item, and raises an
 # exception if not.
@@ -494,17 +472,6 @@ def ReadConfigFile(filename):
         ans.append(layer_object)
         prev_names.append(layer_object.Name())
 
-# Uses ParseConfigLine() to turn a config line that has been parsed into
-# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
-# and then turns this into an object representing that line of the config file.
-# 'prev_names' is a list of the names of preceding lines of the
-# config file.
-def ConfigLineToObject(config_line, prev_names = None):
-    (first_token, key_to_value) = ParseConfigLine(config_line)
-
-    if first_token == 'input':
-        return XconfigInputLine(key_to_value)
-
 
 def TestLibrary():
     TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1]  # remove 'end of string'
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
new file mode 100755
index 00000000000..316a4bb4cb3
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -0,0 +1,421 @@
+#!/usr/bin/env python
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import os
+import argparse
+import shlex
+import sys
+import warnings
+import copy
+import imp
+import ast
+
+
+sys.path.insert(0, 'steps/nnet3/libs/')
+from xconfig_lib import *
+from xconfig_layers import *
+
+
+def GetArgs():
+    # we add compulsary arguments as named arguments for readability
+    parser = argparse.ArgumentParser(description="Reads an xconfig file and creates config files "
+                                     "for neural net creation and training",
+                                     epilog="Search egs/*/*/local/nnet3/*sh for examples")
+
+    parser.add_argument("--self-repair-scale-nonlinearity", type=float,
+                        help="A non-zero value activates the self-repair mechanism in "
+                        "nonlinearities (larger -> faster self-repair)", default=1.0e-05)
+    parser.add_argument("xconfig_file",
+                        help="Filename of input xconfig file")
+    parser.add_argument("config_dir",
+                        help="Directory to write config files and variables")
+
+    print(' '.join(sys.argv))
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if not os.path.exists(args.config_dir):
+        os.makedirs(args.config_dir)
+    if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1:
+        sys.exit("{0}: invalid option --self-repair-scale-nonlinearity={1}".format(
+            sys.argv[0], args.self_repair_scale_nonlinearity))
+
+    return args
+
+def AddConvMaxpLayer(config_lines, name, input, args):
+    if '3d-dim' not in input:
+        raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.")
+
+    input = nodes.AddConvolutionLayer(config_lines, name, input,
+                              input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2],
+                              args.filt_x_dim, args.filt_y_dim,
+                              args.filt_x_step, args.filt_y_step,
+                              args.num_filters, input['vectorization'])
+
+    if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1:
+      input = nodes.AddMaxpoolingLayer(config_lines, name, input,
+                                input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2],
+                                args.pool_x_size, args.pool_y_size, args.pool_z_size,
+                                args.pool_x_step, args.pool_y_step, args.pool_z_step)
+
+    return input
+
+# The ivectors are processed through an affine layer parallel to the CNN layers,
+# then concatenated with the CNN output and passed to the deeper part of the network.
+def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0):
+    cnn_args = ParseCnnString(cnn_layer)
+    num_cnn_layers = len(cnn_args)
+    # We use an Idct layer here to convert MFCC to FBANK features
+    nnet3_train_lib.WriteIdctMatrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat")
+    prev_layer_output = {'descriptor':  "input",
+                         'dimension': feat_dim}
+    prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat')
+
+    list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes]
+    splice_descriptor = "Append({0})".format(", ".join(list))
+    cnn_input_dim = len(splice_indexes) * feat_dim
+    prev_layer_output = {'descriptor':  splice_descriptor,
+                         'dimension': cnn_input_dim,
+                         '3d-dim': [len(splice_indexes), feat_dim, 1],
+                         'vectorization': 'yzx'}
+
+    for cl in range(0, num_cnn_layers):
+        prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl])
+
+    if cnn_bottleneck_dim > 0:
+        prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "")
+
+    if ivector_dim > 0:
+        iv_layer_output = {'descriptor':  'ReplaceIndex(ivector, t, 0)',
+                           'dimension': ivector_dim}
+        iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "")
+        prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor'])
+        prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension']
+
+    return prev_layer_output
+
+def PrintConfig(file_name, config_lines):
+    f = open(file_name, 'w')
+    f.write("\n".join(config_lines['components'])+"\n")
+    f.write("\n#Component nodes\n")
+    f.write("\n".join(config_lines['component-nodes'])+"\n")
+    f.close()
+
+def ParseCnnString(cnn_param_string_list):
+    cnn_parser = argparse.ArgumentParser(description="cnn argument parser")
+
+    cnn_parser.add_argument("--filt-x-dim", required=True, type=int)
+    cnn_parser.add_argument("--filt-y-dim", required=True, type=int)
+    cnn_parser.add_argument("--filt-x-step", type=int, default = 1)
+    cnn_parser.add_argument("--filt-y-step", type=int, default = 1)
+    cnn_parser.add_argument("--num-filters", required=True, type=int)
+    cnn_parser.add_argument("--pool-x-size", type=int, default = 1)
+    cnn_parser.add_argument("--pool-y-size", type=int, default = 1)
+    cnn_parser.add_argument("--pool-z-size", type=int, default = 1)
+    cnn_parser.add_argument("--pool-x-step", type=int, default = 1)
+    cnn_parser.add_argument("--pool-y-step", type=int, default = 1)
+    cnn_parser.add_argument("--pool-z-step", type=int, default = 1)
+
+    cnn_args = []
+    for cl in range(0, len(cnn_param_string_list)):
+         cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl])))
+
+    return cnn_args
+
+def ParseSpliceString(splice_indexes):
+    splice_array = []
+    left_context = 0
+    right_context = 0
+    split1 = splice_indexes.split();  # we already checked the string is nonempty.
+    if len(split1) < 1:
+        raise Exception("invalid splice-indexes argument, too short: "
+                 + splice_indexes)
+    try:
+        for string in split1:
+            split2 = string.split(",")
+            if len(split2) < 1:
+                raise Exception("invalid splice-indexes argument, too-short element: "
+                         + splice_indexes)
+            int_list = []
+            for int_str in split2:
+                int_list.append(int(int_str))
+            if not int_list == sorted(int_list):
+                raise Exception("elements of splice-indexes must be sorted: "
+                         + splice_indexes)
+            left_context += -int_list[0]
+            right_context += int_list[-1]
+            splice_array.append(int_list)
+    except ValueError as e:
+        raise Exception("invalid splice-indexes argument " + splice_indexes + str(e))
+    left_context = max(0, left_context)
+    right_context = max(0, right_context)
+
+    return {'left_context':left_context,
+            'right_context':right_context,
+            'splice_indexes':splice_array,
+            'num_hidden_layers':len(splice_array)
+            }
+
+# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script.
+def MakeConfigs(config_dir, splice_indexes_string,
+                cnn_layer, cnn_bottleneck_dim, cepstral_lifter,
+                feat_dim, ivector_dim, num_targets, add_lda,
+                nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim,
+                nonlin_output_dim_init, nonlin_output_dim_final,
+                use_presoftmax_prior_scale,
+                final_layer_normalize_target,
+                include_log_softmax,
+                add_final_sigmoid,
+                xent_regularize,
+                xent_separate_forward_affine,
+                self_repair_scale,
+                objective_type):
+
+    parsed_splice_output = ParseSpliceString(splice_indexes_string.strip())
+
+    left_context = parsed_splice_output['left_context']
+    right_context = parsed_splice_output['right_context']
+    num_hidden_layers = parsed_splice_output['num_hidden_layers']
+    splice_indexes = parsed_splice_output['splice_indexes']
+    input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim
+
+    if xent_separate_forward_affine:
+        if splice_indexes[-1] != [0]:
+            raise Exception("--xent-separate-forward-affine option is supported only if the last-hidden layer has no splicing before it. Please use a splice-indexes with just 0 as the final splicing config.")
+
+    prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir)
+
+    config_lines = {'components':[], 'component-nodes':[]}
+
+    config_files={}
+    prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], ivector_dim)
+
+    # Add the init config lines for estimating the preconditioning matrices
+    init_config_lines = copy.deepcopy(config_lines)
+    init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to')
+    init_config_lines['components'].insert(0, '# preconditioning matrix computation')
+    nodes.AddOutputLayer(init_config_lines, prev_layer_output)
+    config_files[config_dir + '/init.config'] = init_config_lines
+
+    if cnn_layer is not None:
+        prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir,
+                                         feat_dim, splice_indexes[0], ivector_dim)
+
+    if add_lda:
+        prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat')
+
+    left_context = 0
+    right_context = 0
+    # we moved the first splice layer to before the LDA..
+    # so the input to the first affine layer is going to [0] index
+    splice_indexes[0] = [0]
+
+    if not nonlin_output_dim is None:
+        nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers
+    elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1:
+        raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.")
+    else:
+        # computes relu-dim for each hidden layer. They increase geometrically across layers
+        factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1
+        nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)]
+        assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error
+        nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified
+
+    for i in range(0, num_hidden_layers):
+        # make the intermediate config file for layerwise discriminative training
+
+        # prepare the spliced input
+        if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0):
+            try:
+                zero_index = splice_indexes[i].index(0)
+            except ValueError:
+                zero_index = None
+            # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor
+            prev_layer_output_descriptor = prev_layer_output['descriptor']
+            subset_output = prev_layer_output
+            if subset_dim > 0:
+                # if subset_dim is specified the script expects a zero in the splice indexes
+                assert(zero_index is not None)
+                subset_node_config = "dim-range-node name=Tdnn_input_{0} input-node={1} dim-offset={2} dim={3}".format(i, prev_layer_output_descriptor, 0, subset_dim)
+                subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i),
+                                 'dimension' : subset_dim}
+                config_lines['component-nodes'].append(subset_node_config)
+            appended_descriptors = []
+            appended_dimension = 0
+            for j in range(len(splice_indexes[i])):
+                if j == zero_index:
+                    appended_descriptors.append(prev_layer_output['descriptor'])
+                    appended_dimension += prev_layer_output['dimension']
+                    continue
+                appended_descriptors.append('Offset({0}, {1})'.format(subset_output['descriptor'], splice_indexes[i][j]))
+                appended_dimension += subset_output['dimension']
+            prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)),
+                                 'dimension'  : appended_dimension}
+        else:
+            # this is a normal affine node
+            pass
+
+        if xent_separate_forward_affine and i == num_hidden_layers - 1:
+            if xent_regularize == 0.0:
+                raise Exception("xent-separate-forward-affine=True is valid only if xent-regularize is non-zero")
+
+            if nonlin_type == "relu" :
+                prev_layer_output_chain = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_chain",
+                                                                   prev_layer_output, nonlin_output_dim,
+                                                                   self_repair_scale = self_repair_scale,
+                                                                   norm_target_rms = final_layer_normalize_target)
+
+                prev_layer_output_xent = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_xent",
+                                                                  prev_layer_output, nonlin_output_dim,
+                                                                  self_repair_scale = self_repair_scale,
+                                                                  norm_target_rms = final_layer_normalize_target)
+            elif nonlin_type == "pnorm" :
+                prev_layer_output_chain = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_chain",
+                                                                 prev_layer_output, nonlin_input_dim, nonlin_output_dim,
+                                                                 norm_target_rms = final_layer_normalize_target)
+
+                prev_layer_output_xent = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_xent",
+                                                                prev_layer_output, nonlin_input_dim, nonlin_output_dim,
+                                                                norm_target_rms = final_layer_normalize_target)
+            else:
+                raise Exception("Unknown nonlinearity type")
+
+            nodes.AddFinalLayer(config_lines, prev_layer_output_chain, num_targets,
+                               use_presoftmax_prior_scale = use_presoftmax_prior_scale,
+                               prior_scale_file = prior_scale_file,
+                               include_log_softmax = include_log_softmax)
+
+            nodes.AddFinalLayer(config_lines, prev_layer_output_xent, num_targets,
+                                ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
+                                    0.5 / xent_regularize),
+                                use_presoftmax_prior_scale = use_presoftmax_prior_scale,
+                                prior_scale_file = prior_scale_file,
+                                include_log_softmax = True,
+                                name_affix = 'xent')
+        else:
+            if nonlin_type == "relu":
+                prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i),
+                                                            prev_layer_output, nonlin_output_dims[i],
+                                                            self_repair_scale = self_repair_scale,
+                                                            norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target)
+            elif nonlin_type == "pnorm":
+                prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i),
+                                                           prev_layer_output, nonlin_input_dim, nonlin_output_dim,
+                                                           norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target)
+            else:
+                raise Exception("Unknown nonlinearity type")
+            # a final layer is added after each new layer as we are generating
+            # configs for layer-wise discriminative training
+
+            # add_final_sigmoid adds a sigmoid as a final layer as alternative
+            # to log-softmax layer.
+            # http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression#Softmax_Regression_vs._k_Binary_Classifiers
+            # This is useful when you need the final outputs to be probabilities between 0 and 1.
+            # Usually used with an objective-type such as "quadratic".
+            # Applications are k-binary classification such Ideal Ratio Mask prediction.
+            nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
+                               use_presoftmax_prior_scale = use_presoftmax_prior_scale,
+                               prior_scale_file = prior_scale_file,
+                               include_log_softmax = include_log_softmax,
+                               add_final_sigmoid = add_final_sigmoid,
+                               objective_type = objective_type)
+            if xent_regularize != 0.0:
+                nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
+                                    ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
+                                          0.5 / xent_regularize),
+                                    use_presoftmax_prior_scale = use_presoftmax_prior_scale,
+                                    prior_scale_file = prior_scale_file,
+                                    include_log_softmax = True,
+                                    name_affix = 'xent')
+
+        config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines
+        config_lines = {'components':[], 'component-nodes':[]}
+
+    left_context += int(parsed_splice_output['left_context'])
+    right_context += int(parsed_splice_output['right_context'])
+
+    # write the files used by other scripts like steps/nnet3/get_egs.sh
+    f = open(config_dir + "/vars", "w")
+    print('model_left_context=' + str(left_context), file=f)
+    print('model_right_context=' + str(right_context), file=f)
+    print('num_hidden_layers=' + str(num_hidden_layers), file=f)
+    print('num_targets=' + str(num_targets), file=f)
+    print('add_lda=' + ('true' if add_lda else 'false'), file=f)
+    print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f)
+    print('objective_type=' + objective_type, file=f)
+    f.close()
+
+    # printing out the configs
+    # init.config used to train lda-mllt train
+    for key in config_files.keys():
+        PrintConfig(key, config_files[key])
+
+
+def BackUpXconfigFile():
+    # we write a copy of the xconfig file just to have a record of the original
+    # input.
+    try:
+        xconfig_file_out = open(args.config_dir + "/xconfig")
+    except:
+        sys.exit("{0}: error opening file {1}/xconfig for output".format(
+            sys.argv[0], args.config_dir))
+    try:
+        xconfig_file_in = open(args.xconfig_file)
+    except:
+        sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], args.config_dir))
+
+    print("# This file was copied from {0} by {1}.  It is the source\n"
+          "# from which the config files in this directory were generated.\n"
+          "# Full command line was:\n"
+          "# {2}".format(args.xconfig_file, sys.argv[0], ' '.join(sys.argv)),
+          file=xconfig_file_out)
+
+    while True:
+        line = xconfig_file_in.readline()
+        if line == '':
+            break
+        print(line.strip(), file=xconfig_file_out)
+    xconfig_file_out.close()
+    xconfig_file_in.close()
+
+
+def Main():
+    args = GetArgs()
+
+    BackUpXconfigFile()
+
+
+    try:
+        f =
+        shutil.copyfile(args.xconfig_file, args.xconfig_dir
+
+    MakeConfigs(config_dir = args.config_dir,
+                splice_indexes_string = args.splice_indexes,
+                feat_dim = args.feat_dim, ivector_dim = args.ivector_dim,
+                num_targets = args.num_targets,
+                add_lda = args.add_lda,
+                cnn_layer = args.cnn_layer,
+                cnn_bottleneck_dim = args.cnn_bottleneck_dim,
+                cepstral_lifter = args.cepstral_lifter,
+                nonlin_type = args.nonlin_type,
+                nonlin_input_dim = args.nonlin_input_dim,
+                nonlin_output_dim = args.nonlin_output_dim,
+                subset_dim = args.subset_dim,
+                nonlin_output_dim_init = args.nonlin_output_dim_init,
+                nonlin_output_dim_final = args.nonlin_output_dim_final,
+                use_presoftmax_prior_scale = args.use_presoftmax_prior_scale,
+                final_layer_normalize_target = args.final_layer_normalize_target,
+                include_log_softmax = args.include_log_softmax,
+                add_final_sigmoid = args.add_final_sigmoid,
+                xent_regularize = args.xent_regularize,
+                xent_separate_forward_affine = args.xent_separate_forward_affine,
+                self_repair_scale = args.self_repair_scale_nonlinearity,
+                objective_type = args.objective_type)
+
+if __name__ == "__main__":
+    Main()

From e96df73c9a1f741702ceba3e70a48391ebbb651f Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Wed, 2 Nov 2016 20:25:28 -0400
Subject: [PATCH 05/12] some partial changes

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 52 +++++++++-
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py    |  2 +-
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  | 99 ++++++++++++++++---
 3 files changed, 139 insertions(+), 14 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index 97f8c4846b6..e3d50115b03 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -278,6 +278,21 @@ def GetFullConfig(self, all_layers):
         return ans
 
 
+# Converts a line as parsed by ParseConfigLine() into a first
+# token e.g. 'input-layer' and a key->value map, into
+# an objet inherited from XconfigLayerBase.
+# 'prev_names' is a list of previous layer names, it's needed
+# to parse things like '[-1]' (meaning: the previous layer)
+# when they appear in Desriptors.
+def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
+    if first_token == 'input':
+        return XconfigInputLayer(first_token, key_to_value, prev_names)
+    else:
+        raise Exception("Error parsing xconfig line (no such layer type): " +
+                        first_token + ' ' +
+                        ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()]))
+
+
 # Uses ParseConfigLine() to turn a config line that has been parsed into
 # a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
 # and then turns this into an object representing that line of the config file.
@@ -285,9 +300,42 @@ def GetFullConfig(self, all_layers):
 # config file.
 def ConfigLineToObject(config_line, prev_names = None):
     (first_token, key_to_value) = ParseConfigLine(config_line)
+    return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names)
 
-    if first_token == 'input':
-        return XconfigInputLayer(first_token, key_to_value)
+
+
+# This function reads an xconfig file and returns it as a list of layers
+# (usually we use the variable name 'all_layers' elsewhere for this).
+# It will die if the xconfig file is empty or if there was
+# some error parsing it.
+def ReadXconfigFile(xconfig_filename):
+    try:
+        f = open(xconfig_filename, 'r')
+    except Exception as e:
+        sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format(
+            sys.argv[0], xconfig_filename, repr(e)))
+    prev_names = []
+    all_layers = []
+    while True:
+        line = f.readline()
+        if line == '':
+            break
+        x = ParseConfigLine(config_line)
+        if x is None:
+            continue   # line was blank or only comments.
+        (first_token, key_to_value) = x
+        # the next call will raise an easy-to-understand exception if
+        # it fails.
+        this_layer = ParsedLineToXconfigLayer(first_token,
+                                              key_to_value,
+                                              prev_names)
+        prev_names.append(this_layer.Name())
+        all_layers.append(this_layer)
+    if len(all_layers) == 0:
+        raise Exception("{0}: xconfig file '{1}' is empty".format(
+            sys.argv[0], xconfig_filename))
+    f.close()
+    return all_layers
 
 
 def TestLayers():
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
index 17a25f9fbb4..fbdc011296f 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -407,7 +407,7 @@ def TokenizeDescriptor(descriptor_string,
 # Note: spaces are allowed in the field names but = signs are
 # disallowed, which is why it's possible to parse them.
 # This function also removes comments (anything after '#').
-# As a special case, this function will return NULL if the line
+# As a special case, this function will return None if the line
 # is empty after removing spaces.
 def ParseConfigLine(orig_config_line):
     # Remove comments.
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 316a4bb4cb3..a4192685622 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -10,7 +10,7 @@
 import copy
 import imp
 import ast
-
+from collections import defaultdict
 
 sys.path.insert(0, 'steps/nnet3/libs/')
 from xconfig_lib import *
@@ -356,23 +356,23 @@ def MakeConfigs(config_dir, splice_indexes_string,
         PrintConfig(key, config_files[key])
 
 
-def BackUpXconfigFile():
+def BackUpXconfigFile(xconfig_file, config_dir):
     # we write a copy of the xconfig file just to have a record of the original
     # input.
     try:
-        xconfig_file_out = open(args.config_dir + "/xconfig")
+        xconfig_file_out = open(config_dir + "/xconfig")
     except:
         sys.exit("{0}: error opening file {1}/xconfig for output".format(
-            sys.argv[0], args.config_dir))
+            sys.argv[0], config_dir))
     try:
-        xconfig_file_in = open(args.xconfig_file)
+        xconfig_file_in = open(xconfig_file)
     except:
-        sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], args.config_dir))
+        sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], config_dir))
 
-    print("# This file was copied from {0} by {1}.  It is the source\n"
-          "# from which the config files in this directory were generated.\n"
-          "# Full command line was:\n"
-          "# {2}".format(args.xconfig_file, sys.argv[0], ' '.join(sys.argv)),
+    print("# This file was created by the command:\n"
+          "# {0}\n"
+          "# It is a copy of the source from which the config files in "
+          "# this directory were generated.\n".format(' '.join(sys.argv)),
           file=xconfig_file_out)
 
     while True:
@@ -384,11 +384,88 @@ def BackUpXconfigFile():
     xconfig_file_in.close()
 
 
+def WriteExpandedXconfigFile(config_dir, all_layers):
+    try:
+        xconfig_file_out = open(config_dir + "/xconfig.expanded")
+    except:
+        sys.exit("{0}: error opening file {1}/xconfig.expanded for output".format(
+            sys.argv[0], config_dir))
+
+    print("# This file was created by {0}.  It contains the same content as\n"
+          "# ./xconfig but it was parsed, default config values were set, and\n"
+          "# it was printed from the internal representation.\n".format(sys.argv[0]),
+          file=xconfig_file_out)
+
+    for layer in all_layers:
+        print(str(layer), file=xconfig_file_out)
+    xconfig_file_out.close()
+
+
+# This function returns a map from config-file basename
+# e.g. 'init', 'ref', 'layer1' to a documentation string that goes
+# at the top of the file.
+def GetConfigHeaders():
+    ans = defaultdict(str)  # resulting dict will default to the empty string
+                            # for any config files not explicitly listed here.
+    ans['init'] = ("# This file was created by the command:\n"
+                   "# " + ' '.join(sys.argv) + "\n"
+                   "# It contains the input of the network and is used in\n"
+                   "# accumulating stats for an LDA-like transform of the\n"
+                   "# input features.\n");
+    ans['ref'] = ("# This file was created by the command:\n"
+                  "# " + ' '.join(sys.argv) + "\n"
+                  "# It contains the entire neural network, but with those\n"
+                  "# components that would normally require fixed vectors/matrices\n"
+                  "# read from disk, replaced with random initialization\n"
+                  "# (this applies to the LDA-like transform and the\n"
+                  "# presoftmax-prior-scale, if applicable).  This file\n"
+                  "# is used only to work out the left-context and right-context\n"
+                  "# of the network.\n");
+    ans['all'] = ("# This file was created by the command:\n"
+                  "# " + ' '.join(sys.argv) + "\n"
+                  "# It contains the entire neural network.  It might not be used\n"
+                  "# in the current scripts; it's provided for forward compatibility\n"
+                  "# to possible future changes.\n")
+
+    # Note: currently we just copy all lines that were going to go to 'all', into
+    # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py
+    ans['layer1'] = ("# This file was created by the command:\n"
+                     "# " + ' '.join(sys.argv) + "\n"
+                     "# It contains the configuration of the entire neural network.\n"
+                     "# The contents are the same\n"
+                     "# as 'all.config'.  The reason this file is named this way (and\n"
+                     "# that the config file `num_hidden_layers` contains 1, even though\n"
+                     "# this file may really contain more than 1 hidden layer), is\n"
+                     "# historical... we used to create networks by adding hidden layers\n"
+                     "# one by one (discriminative pretraining), but more recently we\n"
+                     "# have found that it's better to add them all at once.  This file\n"
+                     "# exists to enable the older training scripts to work.  Note:\n"
+                     "# it contains the inputs of the neural network even though it doesn't\n"
+                     "# have to (since they are included in 'init.config').  This will\n"
+                     "# give us the flexibility to change the scripts in future.\n");
+    return ans;
+
+
+
+
+# This is where most of the work of this program happens.
+def WriteConfigFiles(config_dir, all_layers):
+    config_basename_to_lines = defaultdict(list)2
+
+    config_basename_to_header = GetConfigHeaders()
+
+
+
+
+
 def Main():
     args = GetArgs()
 
-    BackUpXconfigFile()
+    BackUpXconfigFile(args.xconfig_file, args.config_dir)
+
+    all_layers = ReadXconfigFile(args.xconfig_file)
 
+    WriteExpandedXconfigFile(args.config_dir all_layers)
 
     try:
         f =

From 0886454975bf8113abadd75c5166f85f9f42d4ed Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Thu, 3 Nov 2016 17:40:36 -0400
Subject: [PATCH 06/12] some minor reorganization

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 270 +++++----
 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py    | 129 ++++-
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  | 528 +++++-------------
 3 files changed, 383 insertions(+), 544 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index e3d50115b03..c8511600b6c 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -9,90 +9,6 @@
 import argparse
 from xconfig_lib import *
 
-# Given a list of objects of type XconfigLayerBase ('all_layers'),
-# including at least the layers preceding 'current_layer' (and maybe
-# more layers), return the names of layers preceding 'current_layer'
-# This will be used in parsing expressions like [-1] in descriptors
-# (which is an alias for the previous layer).
-def GetPrevNames(all_layers, current_layer):
-    assert current_layer in all_layers
-    prev_names = []
-    for layer in all_layers:
-        if layer is current_layer:
-            break
-        prev_names.append(layer.Name())
-    return prev_names
-
-# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
-# 'lstm2.memory_cell', into a dimension.  'all_layers' is a vector of objects
-# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
-# function can make sure not to look in layers that appear *after* this layer
-# (because that's not allowed).
-def GetDimFromLayerName(all_layers, current_layer, full_layer_name):
-    assert isinstance(full_layer_name, str)
-    split_name = full_layer_name.split('.')
-    if len(split_name) == 0:
-        raise Exception("Bad layer name: " + full_layer_name)
-    layer_name = split_name[0]
-    if len(split_name) == 1:
-        qualifier = None
-    else:
-        # we probably expect len(split_name) == 2 in this case,
-        # but no harm in allowing dots in the qualifier.
-        qualifier = '.'.join(split_name[1:])
-
-    for layer in all_layers:
-        if layer is current_layer:
-            break
-        if layer.Name() == layer_name:
-            if not qualifier in layer.Qualifiers():
-                raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
-                    layer_name, qualifier))
-            return layer.OutputDim(qualifier)
-    # No such layer was found.
-    if layer_name in [ layer.Name() for layer in all_layers ]:
-        raise Exception("Layer '{0}' was requested before it appeared in "
-                        "the xconfig file (circular dependencies or out-of-order "
-                        "layers".format(layer_name))
-    else:
-        raise Exception("No such layer: '{0}'".format(layer_name))
-
-
-# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
-# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple
-# component-node name) that can appear in the generated config file.  'all_layers' is a vector of objects
-# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
-# function can make sure not to look in layers that appear *after* this layer
-# (because that's not allowed).
-def GetStringFromLayerName(all_layers, current_layer, full_layer_name):
-    assert isinstance(full_layer_name, str)
-    split_name = full_layer_name.split('.')
-    if len(split_name) == 0:
-        raise Exception("Bad layer name: " + full_layer_name)
-    layer_name = split_name[0]
-    if len(split_name) == 1:
-        qualifier = None
-    else:
-        # we probably expect len(split_name) == 2 in this case,
-        # but no harm in allowing dots in the qualifier.
-        qualifier = '.'.join(split_name[1:])
-
-    for layer in all_layers:
-        if layer is current_layer:
-            break
-        if layer.Name() == layer_name:
-            if not qualifier in layer.Qualifiers():
-                raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
-                    layer_name, qualifier))
-            return layer.OutputName(qualifier)
-    # No such layer was found.
-    if layer_name in [ layer.Name() for layer in all_layers ]:
-        raise Exception("Layer '{0}' was requested before it appeared in "
-                        "the xconfig file (circular dependencies or out-of-order "
-                        "layers".format(layer_name))
-    else:
-        raise Exception("No such layer: '{0}'".format(layer_name))
-
 
 
 # A base-class for classes representing layers of xconfig files.
@@ -105,40 +21,73 @@ class XconfigLayerBase(object):
     # The only required and 'special' values that are dealt with directly at this level, are
     # 'name' and 'input'.
     # The rest are put in self.config and are dealt with by the child classes' init functions.
-    # prev_names is an array of the names (xxx in 'name=xxx') of previous
-    # lines of the config file.
+    # all_layers is an array of objects inheriting XconfigLayerBase for all previously
+    # parsed layers.
 
-    def __init__(self, first_token, key_to_value, prev_names = None):
+    def __init__(self, first_token, key_to_value, all_layers):
         self.layer_type = first_token
         if not 'name' in key_to_value:
-            raise Exception("Expected 'name' to be specified.")
+            raise RuntimeError("Expected 'name' to be specified.")
         self.name = key_to_value['name']
         if not IsValidLineName(self.name):
-            raise Exception("Invalid value: name={0}".format(key_to_value['name']))
+            raise RuntimeError("Invalid value: name={0}".format(key_to_value['name']))
 
         # the following, which should be overridden in the child class, sets
         # default config parameters in self.config.
         self.SetDefaultConfigs()
         # The following is not to be reimplemented in child classes;
-        # sets the config files to those specified by the user.
-        self._SetConfigs(key_to_value)
+        # it sets the config values to those specified by the user, and
+        # parses any Descriptors.
+        self.SetConfigs(key_to_value, all_layers)
         # the following, which should be overridden in the child class, checks
         # that the config parameters that have been set are reasonable.
         self.CheckConfigs()
 
 
     # We broke this code out of __init__ for clarity.
-    def _SetConfigs(self, key_to_value):
+    def SetConfigs(self, key_to_value, all_layers):
         # the child-class constructor will deal with the configuration values
         # in a more specific way.
         for key,value in key_to_value.items():
-            if key != 'name' and key != 'input':
+            if key != 'name':
                 if not key in self.config:
-                    raise Exception("Configuration value {0}={1} was not expected in "
+                    raise RuntimeError("Configuration value {0}={1} was not expected in "
                                     "layer of type {2}".format(key, value, self.layer_type))
                 self.config[key] = ConvertValueToType(key, type(self.config[key]), value)
 
 
+        self.descriptors = dict()
+        self.descriptor_dims = dict()
+        # Parse Descriptors and get their dims and their 'final' string form.
+        # Put them as 4-tuples (descriptor, string, normalized-string, final-string)
+        # in self.descriptors[key]
+        for key in self.GetDescriptorConfigs():
+            if not key in self.config:
+                raise RuntimeError("{0}: object of type {1} needs to override "
+                                   "GetDescriptorConfigs()".format(sys.argv[0],
+                                                                   str(type(self))))
+            descriptor_string = self.config[key]  # input string.
+            assert isinstance(descriptor_string, str)
+            desc = self.ConvertToDescriptor(descriptor_string, all_layers)
+            desc_dim = self.GetDimForDescriptor(desc, all_layers)
+            desc_norm_str = desc.str()
+            # desc_output_str contains the "final" component names, those that
+            # appear in the actual config file (i.e. not names like
+            # 'layer.qualifier'); that's how it differs from desc_norm_str.
+            # Note: it's possible that the two strings might be the same in
+            # many, even most, cases-- it depends whether OutputName(self, qualifier)
+            # returns self.Name() + '.' + qualifier when qualifier is not None.
+            # That's up to the designer of the layer type.
+            desc_output_str = self.GetStringForDescriptor(desc, all_layers)
+            self.descriptors[key] = (desc, desc_dim, desc_norm_str, desc_output_str)
+            # the following helps to check the code by parsing it again.
+            desc2 = self.ConvertToDescriptor(desc_norm_str, all_layers)
+            desc_norm_str2 = desc2.str()
+            # if the following ever fails we'll have to do some debugging.
+            if desc_norm_str != desc_norm_str2:
+                raise RuntimeError("Likely code error: '{0}' != '{1}'".format(
+                        desc_norm_str, desc_norm_str2))
+
     # This function converts 'this' to a string which could be printed to an
     # xconfig file; in xconfig_to_configs.py we actually expand all the lines to
     # strings and write it as xconfig.expanded as a reference (so users can
@@ -152,6 +101,17 @@ def str(self):
     def __str__(self):
         return self.str()
 
+
+    # This function converts any config variables in self.config which
+    # correspond to Descriptors, into a 'normalized form' derived from parsing
+    # them as Descriptors, replacing things like [-1] with the actual layer
+    # names, and regenerating them as strings.  We stored this when the
+    # object was initialized, in self.descriptors; this function just copies them
+    # back to the config.
+    def NormalizeDescriptors(self):
+        for key,tuple in self.descriptors.items():
+            self.config[key] = tuple[2]  # desc_norm_str
+
     # This function, which is a convenience function intended to be called from
     # child classes, converts a string representing a descriptor
     # ('descriptor_string') into an object of type Descriptor, and returns it.
@@ -162,15 +122,16 @@ def ConvertToDescriptor(self, descriptor_string, all_layers):
         prev_names = GetPrevNames(all_layers, self)
         tokens = TokenizeDescriptor(descriptor_string, prev_names)
         pos = 0
-        (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+        (descriptor, pos) = ParseNewDescriptor(tokens, pos, prev_names)
         # note: 'pos' should point to the 'end of string' marker
         # that terminates 'tokens'.
         if pos != len(tokens) - 1:
-            raise Exception("Parsing Descriptor, saw junk at end: " +
+            raise RuntimeError("Parsing Descriptor, saw junk at end: " +
                             ' '.join(tokens[pos:-1]))
+        return descriptor
 
     # Returns the dimension of a Descriptor object.
-    # This is a convenience function provided for use in child classes;
+    # This is a convenience function used in SetConfigs.
     def GetDimForDescriptor(self, descriptor, all_layers):
         layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name)
         return descriptor.Dim(layer_to_dim_func)
@@ -184,24 +145,37 @@ def GetStringForDescriptor(self, descriptor, all_layers):
 
     # Name() returns the name of this layer, e.g. 'affine1'.  It does not
     # necessarily correspond to a component name.
-    def Name():
+    def Name(self):
         return self.name
 
-    ######  Functions that should be overridden by the child class: #####
+    ######  Functions that might be overridden by the child class: #####
 
     # child classes should override this.
-    def SetDefaultConfigs():
-        raise Exception("Child classes must override SetDefaultConfigs().")
+    def SetDefaultConfigs(self):
+        raise RuntimeError("Child classes must override SetDefaultConfigs().")
 
     # child classes should override this.
-    def CheckConfigs():
+    def CheckConfigs(self):
         pass
 
+    # This function, which may be (but usually will not have to be) overrideden
+    # by child classes, returns a list of keys/names of config variables that
+    # will be interpreted as Descriptors.  It is used in the function
+    # 'NormalizeDescriptors()'.  This implementation will work
+    # layer types whose only Descriptor-valued config is 'input'.
+
+    # If a child class adds more config variables that are interpreted as
+    # descriptors (e.g. to read auxiliary inputs), or does not have an input
+    # (e.g. the XconfigInputLayer), it should override this function's
+    # implementation to something like: `return ['input', 'input2']`
+    def GetDescriptorConfigs(self):
+        return [ 'input' ]
+
     # Returns a list of all qualifiers (meaning auxiliary outputs) that this
     # layer supports.  These are either 'None' for the regular output, or a
     # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that
     # the layer might provide.  Most layer types will not need to override this.
-    def Qualifiers():
+    def Qualifiers(self):
         return [ None ]
 
     # Called with qualifier == None, this returns the component-node name of the
@@ -213,13 +187,13 @@ def Qualifiers():
     # a highway LSTM you need the memory-cell of a layer, so you might allow
     # qualifier='memory_cell' for such a layer type, and it would return the
     # component node or a suitable Descriptor: something like 'lstm3.c_t'
-    def OutputName(qualifier = None):
-        raise Exception("Child classes must override OutputName()")
+    def OutputName(self, qualifier = None):
+        raise RuntimeError("Child classes must override OutputName()")
 
     # The dimension that this layer outputs.  The 'qualifier' parameter is for
     # layer types which support auxiliary outputs.
-    def OutputDim(qualifier = None):
-        raise Exception("Child classes must override OutputDim()")
+    def OutputDim(self, qualifier = None):
+        raise RuntimeError("Child classes must override OutputDim()")
 
     # This function returns lines destined for the 'full' config format, as
     # would be read by the C++ programs.
@@ -229,11 +203,8 @@ def OutputDim(qualifier = None):
     # [ ('init', 'input-node name=input dim=40'),
     #   ('ref', 'input-node name=input dim=40') ]
     # which would be written to config_dir/init.config and config_dir/ref.config.
-    #
-    # 'all_layers' is a vector of objects inheriting from XconfigLayerBase,
-    # which is used to get the component names and dimensions at the input.
-    def GetFullConfig(self, all_layers):
-        raise Exception("Child classes must override GetFullConfig()")
+    def GetFullConfig(self):
+        raise RuntimeError("Child classes must override GetFullConfig()")
 
 
 # This class is for lines like
@@ -252,17 +223,20 @@ def SetDefaultConfigs(self):
 
     def CheckConfigs(self):
         if self.config['dim'] <= 0:
-            raise Exception("Dimension of input-layer '{0}' is not set".format(self.name))
+            raise RuntimeError("Dimension of input-layer '{0}' is not set".format(self.name))
+
+    def GetDescriptorConfigs(self):
+        return []  # there is no 'input' field in self.config.
 
-    def OutputName(qualifier = None):
+    def OutputName(self, qualifier = None):
         assert qualifier is None
         return self.name
 
-    def OutputDim(qualifier = None):
+    def OutputDim(self, qualifier = None):
         assert qualifier is None
         return self.config['dim']
 
-    def GetFullConfig(self, all_layers):
+    def GetFullConfig(self):
         # the input layers need to be printed in 'init.config' (which
         # initializes the neural network prior to the LDA), in 'ref.config',
         # which is a version of the config file used for getting left and right
@@ -271,13 +245,63 @@ def GetFullConfig(self, all_layers):
         # In 'full.config' we write everything, this is just for reference,
         # and also for cases where we don't use the LDA-like transform.
         ans = []
-        for config_name in [ 'init', 'ref', 'full' ]:
+        for config_name in [ 'init', 'ref', 'all' ]:
             ans.append( (config_name,
                          'input-node name={0} dim={1}'.format(self.name,
                                                               self.config['dim'])))
         return ans
 
 
+
+# This class is for lines like
+# 'output name=output input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))'
+# This is for outputs that are not really output "layers" (there is no affine transform or
+# nonlinearity), they just directly map to an output-node in nnet3.
+class XconfigTrivialOutputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'output'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]' }
+
+    def CheckConfigs(self):
+        pass  # nothing to check; descriptor-parsing can't happen in this function.
+
+    def OutputName(self, qualifier = None):
+        assert qualifier is None
+        return self.name
+
+    def OutputDim(self, qualifier = None):
+        assert qualifier is None
+        # note: each value of self.descriptors is (descriptor, dim, normalized-string, output-string).
+        return self.descriptors['input'][1]
+
+    def GetFullConfig(self):
+        # the input layers need to be printed in 'init.config' (which
+        # initializes the neural network prior to the LDA), in 'ref.config',
+        # which is a version of the config file used for getting left and right
+        # context (it doesn't read anything for the LDA-like transform and/or
+        # presoftmax-prior-scale components)
+        # In 'full.config' we write everything, this is just for reference,
+        # and also for cases where we don't use the LDA-like transform.
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'output-string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of
+        descriptor_output_str = self.descriptors['input'][3]
+
+        for config_name in [ 'ref', 'all' ]:
+            ans.append( (config_name,
+                         'output-node name={0} input={1}'.format(
+                        self.name, descriptor_output_str)))
+        return ans
+
+
 # Converts a line as parsed by ParseConfigLine() into a first
 # token e.g. 'input-layer' and a key->value map, into
 # an objet inherited from XconfigLayerBase.
@@ -287,8 +311,10 @@ def GetFullConfig(self, all_layers):
 def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
     if first_token == 'input':
         return XconfigInputLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'output':
+        return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names)
     else:
-        raise Exception("Error parsing xconfig line (no such layer type): " +
+        raise RuntimeError("Error parsing xconfig line (no such layer type): " +
                         first_token + ' ' +
                         ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()]))
 
@@ -314,13 +340,12 @@ def ReadXconfigFile(xconfig_filename):
     except Exception as e:
         sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format(
             sys.argv[0], xconfig_filename, repr(e)))
-    prev_names = []
     all_layers = []
     while True:
         line = f.readline()
         if line == '':
             break
-        x = ParseConfigLine(config_line)
+        x = ParseConfigLine(line)
         if x is None:
             continue   # line was blank or only comments.
         (first_token, key_to_value) = x
@@ -328,11 +353,10 @@ def ReadXconfigFile(xconfig_filename):
         # it fails.
         this_layer = ParsedLineToXconfigLayer(first_token,
                                               key_to_value,
-                                              prev_names)
-        prev_names.append(this_layer.Name())
+                                              all_layers)
         all_layers.append(this_layer)
     if len(all_layers) == 0:
-        raise Exception("{0}: xconfig file '{1}' is empty".format(
+        raise RuntimeError("{0}: xconfig file '{1}' is empty".format(
             sys.argv[0], xconfig_filename))
     f.close()
     return all_layers
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
index fbdc011296f..782e6ebd3e1 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
@@ -1,3 +1,10 @@
+# Copyright  2016  Johns Hopkins University (Author: Daniel Povey).
+# License: Apache 2.0.
+
+# This library contains various utilities that are involved in processing
+# of xconfig -> config conversion.  It contains "generic" lower-level code
+# while xconfig_layers.py contains the code specific to layer types.
+
 from __future__ import print_function
 import subprocess
 import logging
@@ -8,28 +15,92 @@
 import time
 import argparse
 
+# [utility function used in xconfig_layers.py]
+# Given a list of objects of type XconfigLayerBase ('all_layers'),
+# including at least the layers preceding 'current_layer' (and maybe
+# more layers), return the names of layers preceding 'current_layer'
+# This will be used in parsing expressions like [-1] in descriptors
+# (which is an alias for the previous layer).
+def GetPrevNames(all_layers, current_layer):
+    prev_names = []
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        prev_names.append(layer.Name())
+    return prev_names
+
+# [utility function used in xconfig_layers.py]
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a dimension.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetDimFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise RuntimeError("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputDim(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise RuntimeError("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise RuntimeError("No such layer: '{0}'".format(layer_name))
+
+
+# [utility function used in xconfig_layers.py]
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple
+# component-node name) that can appear in the generated config file.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetStringFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise RuntimeError("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputName(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise RuntimeError("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise RuntimeError("No such layer: '{0}'".format(layer_name))
 
-class StrToBoolAction(argparse.Action):
-    """ A custom action to convert bools from shell format i.e., true/false
-        to python format i.e., True/False """
-    def __call__(self, parser, namespace, values, option_string=None):
-        if values == "true":
-            setattr(namespace, self.dest, True)
-        elif values == "false":
-            setattr(namespace, self.dest, False)
-        else:
-            raise Exception("Unknown value {0} for --{1}".format(values, self.dest))
-
-class NullstrToNoneAction(argparse.Action):
-    """ A custom action to convert empty strings passed by shell
-        to None in python. This is necessary as shell scripts print null strings
-        when a variable is not specified. We could use the more apt None
-        in python. """
-    def __call__(self, parser, namespace, values, option_string=None):
-            if values.strip() == "":
-                setattr(namespace, self.dest, None)
-            else:
-                setattr(namespace, self.dest, values)
 
 # This function, used in converting string values in config lines to
 # configuration values in self.config in layers, attempts to
@@ -57,7 +128,7 @@ def ConvertValueToType(key, dest_type, string_value):
             raise Exception("Invalid configuration value {0}={1} (expected int)".format(
                 key, string_value))
     elif dest_type == type(str()):
-        return sting_value
+        return string_value
 
 
 
@@ -123,10 +194,12 @@ def __init__(self,
     def ConfigString(self, layer_to_string):
         if self.operator is None:
             assert len(self.items) == 1 and isinstance(self.items[0], str)
-            return layer_to_node(self.items[0])
+            return layer_to_string(self.items[0])
         else:
             assert isinstance(self.operator, str)
-            return self.operator + '(' + ', '.join([OutputString(item, layer_to_node) for item in self.items]) + ')'
+            return self.operator + '(' + ', '.join(
+                    [ item.ConfigString(layer_to_string) if isinstance(item, Descriptor) else str(item)
+                      for item in self.items]) + ')'
 
     def str(self):
         if self.operator is None:
@@ -154,18 +227,18 @@ def Dim(self, layer_to_dim):
         elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]:
             # these are all operators for which all args are descriptors
             # and must have the same dim.
-            dim = self.items[0].Dim()
+            dim = self.items[0].Dim(layer_to_dim)
             for desc in self.items[1:]:
-                next_dim = desc.Dim()
+                next_dim = desc.Dim(layer_to_dim)
                 if next_dim != dim:
                     raise Exception("In descriptor {0}, different fields have different "
                                     "dimensions: {1} != {2}".format(self.str(), dim, next_dim))
             return dim
         elif self.operator in [  'Offset', 'Round', 'ReplaceIndex' ]:
             # for these operators, only the 1st arg is relevant.
-            return self.items[0].Dim()
+            return self.items[0].Dim(layer_to_dim)
         elif self.operator == 'Append':
-            return sum([ x.Dim() for x in self.items])
+            return sum([ x.Dim(layer_to_dim) for x in self.items])
         else:
             raise Exception("Unknown operator {0}".format(self.operator))
 
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index a4192685622..2684e062e8e 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -13,23 +13,26 @@
 from collections import defaultdict
 
 sys.path.insert(0, 'steps/nnet3/libs/')
+# the following is in case we weren't running this from the normal directory.
+sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/')
+
 from xconfig_lib import *
 from xconfig_layers import *
 
 
 def GetArgs():
     # we add compulsary arguments as named arguments for readability
-    parser = argparse.ArgumentParser(description="Reads an xconfig file and creates config files "
-                                     "for neural net creation and training",
-                                     epilog="Search egs/*/*/local/nnet3/*sh for examples")
-
-    parser.add_argument("--self-repair-scale-nonlinearity", type=float,
-                        help="A non-zero value activates the self-repair mechanism in "
-                        "nonlinearities (larger -> faster self-repair)", default=1.0e-05)
-    parser.add_argument("xconfig_file",
-                        help="Filename of input xconfig file")
-    parser.add_argument("config_dir",
-                        help="Directory to write config files and variables")
+    parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files '
+                                     'for neural net creation and training',
+                                     epilog='Search egs/*/*/local/nnet3/*sh for examples')
+
+    parser.add_argument('--self-repair-scale-nonlinearity', type=float,
+                        help='A non-zero value activates the self-repair mechanism in '
+                        'nonlinearities (larger -> faster self-repair)', default=1.0e-05)
+    parser.add_argument('xconfig_file',
+                        help='Filename of input xconfig file')
+    parser.add_argument('config_dir',
+                        help='Directory to write config files and variables')
 
     print(' '.join(sys.argv))
 
@@ -42,332 +45,37 @@ def CheckArgs(args):
     if not os.path.exists(args.config_dir):
         os.makedirs(args.config_dir)
     if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1:
-        sys.exit("{0}: invalid option --self-repair-scale-nonlinearity={1}".format(
+        sys.exit('{0}: invalid option --self-repair-scale-nonlinearity={1}'.format(
             sys.argv[0], args.self_repair_scale_nonlinearity))
 
     return args
 
-def AddConvMaxpLayer(config_lines, name, input, args):
-    if '3d-dim' not in input:
-        raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.")
-
-    input = nodes.AddConvolutionLayer(config_lines, name, input,
-                              input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2],
-                              args.filt_x_dim, args.filt_y_dim,
-                              args.filt_x_step, args.filt_y_step,
-                              args.num_filters, input['vectorization'])
-
-    if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1:
-      input = nodes.AddMaxpoolingLayer(config_lines, name, input,
-                                input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2],
-                                args.pool_x_size, args.pool_y_size, args.pool_z_size,
-                                args.pool_x_step, args.pool_y_step, args.pool_z_step)
-
-    return input
-
-# The ivectors are processed through an affine layer parallel to the CNN layers,
-# then concatenated with the CNN output and passed to the deeper part of the network.
-def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0):
-    cnn_args = ParseCnnString(cnn_layer)
-    num_cnn_layers = len(cnn_args)
-    # We use an Idct layer here to convert MFCC to FBANK features
-    nnet3_train_lib.WriteIdctMatrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat")
-    prev_layer_output = {'descriptor':  "input",
-                         'dimension': feat_dim}
-    prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat')
-
-    list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes]
-    splice_descriptor = "Append({0})".format(", ".join(list))
-    cnn_input_dim = len(splice_indexes) * feat_dim
-    prev_layer_output = {'descriptor':  splice_descriptor,
-                         'dimension': cnn_input_dim,
-                         '3d-dim': [len(splice_indexes), feat_dim, 1],
-                         'vectorization': 'yzx'}
-
-    for cl in range(0, num_cnn_layers):
-        prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl])
-
-    if cnn_bottleneck_dim > 0:
-        prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "")
-
-    if ivector_dim > 0:
-        iv_layer_output = {'descriptor':  'ReplaceIndex(ivector, t, 0)',
-                           'dimension': ivector_dim}
-        iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "")
-        prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor'])
-        prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension']
-
-    return prev_layer_output
-
-def PrintConfig(file_name, config_lines):
-    f = open(file_name, 'w')
-    f.write("\n".join(config_lines['components'])+"\n")
-    f.write("\n#Component nodes\n")
-    f.write("\n".join(config_lines['component-nodes'])+"\n")
-    f.close()
-
-def ParseCnnString(cnn_param_string_list):
-    cnn_parser = argparse.ArgumentParser(description="cnn argument parser")
-
-    cnn_parser.add_argument("--filt-x-dim", required=True, type=int)
-    cnn_parser.add_argument("--filt-y-dim", required=True, type=int)
-    cnn_parser.add_argument("--filt-x-step", type=int, default = 1)
-    cnn_parser.add_argument("--filt-y-step", type=int, default = 1)
-    cnn_parser.add_argument("--num-filters", required=True, type=int)
-    cnn_parser.add_argument("--pool-x-size", type=int, default = 1)
-    cnn_parser.add_argument("--pool-y-size", type=int, default = 1)
-    cnn_parser.add_argument("--pool-z-size", type=int, default = 1)
-    cnn_parser.add_argument("--pool-x-step", type=int, default = 1)
-    cnn_parser.add_argument("--pool-y-step", type=int, default = 1)
-    cnn_parser.add_argument("--pool-z-step", type=int, default = 1)
-
-    cnn_args = []
-    for cl in range(0, len(cnn_param_string_list)):
-         cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl])))
-
-    return cnn_args
-
-def ParseSpliceString(splice_indexes):
-    splice_array = []
-    left_context = 0
-    right_context = 0
-    split1 = splice_indexes.split();  # we already checked the string is nonempty.
-    if len(split1) < 1:
-        raise Exception("invalid splice-indexes argument, too short: "
-                 + splice_indexes)
-    try:
-        for string in split1:
-            split2 = string.split(",")
-            if len(split2) < 1:
-                raise Exception("invalid splice-indexes argument, too-short element: "
-                         + splice_indexes)
-            int_list = []
-            for int_str in split2:
-                int_list.append(int(int_str))
-            if not int_list == sorted(int_list):
-                raise Exception("elements of splice-indexes must be sorted: "
-                         + splice_indexes)
-            left_context += -int_list[0]
-            right_context += int_list[-1]
-            splice_array.append(int_list)
-    except ValueError as e:
-        raise Exception("invalid splice-indexes argument " + splice_indexes + str(e))
-    left_context = max(0, left_context)
-    right_context = max(0, right_context)
-
-    return {'left_context':left_context,
-            'right_context':right_context,
-            'splice_indexes':splice_array,
-            'num_hidden_layers':len(splice_array)
-            }
-
-# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script.
-def MakeConfigs(config_dir, splice_indexes_string,
-                cnn_layer, cnn_bottleneck_dim, cepstral_lifter,
-                feat_dim, ivector_dim, num_targets, add_lda,
-                nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim,
-                nonlin_output_dim_init, nonlin_output_dim_final,
-                use_presoftmax_prior_scale,
-                final_layer_normalize_target,
-                include_log_softmax,
-                add_final_sigmoid,
-                xent_regularize,
-                xent_separate_forward_affine,
-                self_repair_scale,
-                objective_type):
-
-    parsed_splice_output = ParseSpliceString(splice_indexes_string.strip())
-
-    left_context = parsed_splice_output['left_context']
-    right_context = parsed_splice_output['right_context']
-    num_hidden_layers = parsed_splice_output['num_hidden_layers']
-    splice_indexes = parsed_splice_output['splice_indexes']
-    input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim
-
-    if xent_separate_forward_affine:
-        if splice_indexes[-1] != [0]:
-            raise Exception("--xent-separate-forward-affine option is supported only if the last-hidden layer has no splicing before it. Please use a splice-indexes with just 0 as the final splicing config.")
-
-    prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir)
-
-    config_lines = {'components':[], 'component-nodes':[]}
-
-    config_files={}
-    prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], ivector_dim)
-
-    # Add the init config lines for estimating the preconditioning matrices
-    init_config_lines = copy.deepcopy(config_lines)
-    init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to')
-    init_config_lines['components'].insert(0, '# preconditioning matrix computation')
-    nodes.AddOutputLayer(init_config_lines, prev_layer_output)
-    config_files[config_dir + '/init.config'] = init_config_lines
-
-    if cnn_layer is not None:
-        prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir,
-                                         feat_dim, splice_indexes[0], ivector_dim)
-
-    if add_lda:
-        prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat')
-
-    left_context = 0
-    right_context = 0
-    # we moved the first splice layer to before the LDA..
-    # so the input to the first affine layer is going to [0] index
-    splice_indexes[0] = [0]
-
-    if not nonlin_output_dim is None:
-        nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers
-    elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1:
-        raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.")
-    else:
-        # computes relu-dim for each hidden layer. They increase geometrically across layers
-        factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1
-        nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)]
-        assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error
-        nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified
-
-    for i in range(0, num_hidden_layers):
-        # make the intermediate config file for layerwise discriminative training
-
-        # prepare the spliced input
-        if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0):
-            try:
-                zero_index = splice_indexes[i].index(0)
-            except ValueError:
-                zero_index = None
-            # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor
-            prev_layer_output_descriptor = prev_layer_output['descriptor']
-            subset_output = prev_layer_output
-            if subset_dim > 0:
-                # if subset_dim is specified the script expects a zero in the splice indexes
-                assert(zero_index is not None)
-                subset_node_config = "dim-range-node name=Tdnn_input_{0} input-node={1} dim-offset={2} dim={3}".format(i, prev_layer_output_descriptor, 0, subset_dim)
-                subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i),
-                                 'dimension' : subset_dim}
-                config_lines['component-nodes'].append(subset_node_config)
-            appended_descriptors = []
-            appended_dimension = 0
-            for j in range(len(splice_indexes[i])):
-                if j == zero_index:
-                    appended_descriptors.append(prev_layer_output['descriptor'])
-                    appended_dimension += prev_layer_output['dimension']
-                    continue
-                appended_descriptors.append('Offset({0}, {1})'.format(subset_output['descriptor'], splice_indexes[i][j]))
-                appended_dimension += subset_output['dimension']
-            prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)),
-                                 'dimension'  : appended_dimension}
-        else:
-            # this is a normal affine node
-            pass
-
-        if xent_separate_forward_affine and i == num_hidden_layers - 1:
-            if xent_regularize == 0.0:
-                raise Exception("xent-separate-forward-affine=True is valid only if xent-regularize is non-zero")
-
-            if nonlin_type == "relu" :
-                prev_layer_output_chain = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_chain",
-                                                                   prev_layer_output, nonlin_output_dim,
-                                                                   self_repair_scale = self_repair_scale,
-                                                                   norm_target_rms = final_layer_normalize_target)
-
-                prev_layer_output_xent = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_xent",
-                                                                  prev_layer_output, nonlin_output_dim,
-                                                                  self_repair_scale = self_repair_scale,
-                                                                  norm_target_rms = final_layer_normalize_target)
-            elif nonlin_type == "pnorm" :
-                prev_layer_output_chain = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_chain",
-                                                                 prev_layer_output, nonlin_input_dim, nonlin_output_dim,
-                                                                 norm_target_rms = final_layer_normalize_target)
-
-                prev_layer_output_xent = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_xent",
-                                                                prev_layer_output, nonlin_input_dim, nonlin_output_dim,
-                                                                norm_target_rms = final_layer_normalize_target)
-            else:
-                raise Exception("Unknown nonlinearity type")
-
-            nodes.AddFinalLayer(config_lines, prev_layer_output_chain, num_targets,
-                               use_presoftmax_prior_scale = use_presoftmax_prior_scale,
-                               prior_scale_file = prior_scale_file,
-                               include_log_softmax = include_log_softmax)
-
-            nodes.AddFinalLayer(config_lines, prev_layer_output_xent, num_targets,
-                                ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
-                                    0.5 / xent_regularize),
-                                use_presoftmax_prior_scale = use_presoftmax_prior_scale,
-                                prior_scale_file = prior_scale_file,
-                                include_log_softmax = True,
-                                name_affix = 'xent')
-        else:
-            if nonlin_type == "relu":
-                prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i),
-                                                            prev_layer_output, nonlin_output_dims[i],
-                                                            self_repair_scale = self_repair_scale,
-                                                            norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target)
-            elif nonlin_type == "pnorm":
-                prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i),
-                                                           prev_layer_output, nonlin_input_dim, nonlin_output_dim,
-                                                           norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target)
-            else:
-                raise Exception("Unknown nonlinearity type")
-            # a final layer is added after each new layer as we are generating
-            # configs for layer-wise discriminative training
-
-            # add_final_sigmoid adds a sigmoid as a final layer as alternative
-            # to log-softmax layer.
-            # http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression#Softmax_Regression_vs._k_Binary_Classifiers
-            # This is useful when you need the final outputs to be probabilities between 0 and 1.
-            # Usually used with an objective-type such as "quadratic".
-            # Applications are k-binary classification such Ideal Ratio Mask prediction.
-            nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
-                               use_presoftmax_prior_scale = use_presoftmax_prior_scale,
-                               prior_scale_file = prior_scale_file,
-                               include_log_softmax = include_log_softmax,
-                               add_final_sigmoid = add_final_sigmoid,
-                               objective_type = objective_type)
-            if xent_regularize != 0.0:
-                nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets,
-                                    ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format(
-                                          0.5 / xent_regularize),
-                                    use_presoftmax_prior_scale = use_presoftmax_prior_scale,
-                                    prior_scale_file = prior_scale_file,
-                                    include_log_softmax = True,
-                                    name_affix = 'xent')
-
-        config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines
-        config_lines = {'components':[], 'component-nodes':[]}
-
-    left_context += int(parsed_splice_output['left_context'])
-    right_context += int(parsed_splice_output['right_context'])
-
-    # write the files used by other scripts like steps/nnet3/get_egs.sh
-    f = open(config_dir + "/vars", "w")
-    print('model_left_context=' + str(left_context), file=f)
-    print('model_right_context=' + str(right_context), file=f)
-    print('num_hidden_layers=' + str(num_hidden_layers), file=f)
-    print('num_targets=' + str(num_targets), file=f)
-    print('add_lda=' + ('true' if add_lda else 'false'), file=f)
-    print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f)
-    print('objective_type=' + objective_type, file=f)
-    f.close()
-
-    # printing out the configs
-    # init.config used to train lda-mllt train
-    for key in config_files.keys():
-        PrintConfig(key, config_files[key])
+
+#     # write the files used by other scripts like steps/nnet3/get_egs.sh
+#     f = open(config_dir + 'vars', 'w')
+#     print('model_left_context=' + str(left_context), file=f)
+#     print('model_right_context=' + str(right_context), file=f)
+#     print('num_hidden_layers=' + str(num_hidden_layers), file=f)
+#     print('num_targets=' + str(num_targets), file=f)
+#     print('add_lda=' + ('true' if add_lda else 'false'), file=f)
+#     print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f)
+#     print('objective_type=' + objective_type, file=f)
+#     f.close()
+
 
 
 def BackUpXconfigFile(xconfig_file, config_dir):
     # we write a copy of the xconfig file just to have a record of the original
     # input.
     try:
-        xconfig_file_out = open(config_dir + "/xconfig")
+        xconfig_file_out = open(config_dir + '/xconfig', 'w')
     except:
-        sys.exit("{0}: error opening file {1}/xconfig for output".format(
+        sys.exit('{0}: error opening file {1}/xconfig for output'.format(
             sys.argv[0], config_dir))
     try:
         xconfig_file_in = open(xconfig_file)
     except:
-        sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], config_dir))
+        sys.exit('{0}: error opening file {1} for input'.format(sys.argv[0], config_dir))
 
     print("# This file was created by the command:\n"
           "# {0}\n"
@@ -384,65 +92,90 @@ def BackUpXconfigFile(xconfig_file, config_dir):
     xconfig_file_in.close()
 
 
-def WriteExpandedXconfigFile(config_dir, all_layers):
+# This functions writes config_dir/xconfig.expanded.1 and
+# config_dir/xconfig.expanded.2, showing some of the internal stages of
+# processing the xconfig file before turning it into config files.
+def WriteExpandedXconfigFiles(config_dir, all_layers):
     try:
-        xconfig_file_out = open(config_dir + "/xconfig.expanded")
+        xconfig_file_out = open(config_dir + '/xconfig.expanded.1', 'w')
     except:
-        sys.exit("{0}: error opening file {1}/xconfig.expanded for output".format(
+        sys.exit('{0}: error opening file {1}/xconfig.expanded.1 for output'.format(
             sys.argv[0], config_dir))
 
-    print("# This file was created by {0}.  It contains the same content as\n"
-          "# ./xconfig but it was parsed, default config values were set, and\n"
-          "# it was printed from the internal representation.\n".format(sys.argv[0]),
+
+    print('# This file was created by the command:\n'
+          '# ' + ' '.join(sys.argv) + '\n'
+          '#It contains the same content as ./xconfig but it was parsed and\n'
+          '#default config values were set.\n'
+          '# See also ./xconfig.expanded.2\n', file=xconfig_file_out)
+
+    for layer in all_layers:
+        print(str(layer), file=xconfig_file_out)
+    xconfig_file_out.close()
+
+    try:
+        xconfig_file_out = open(config_dir + '/xconfig.expanded.2', 'w')
+    except:
+        sys.exit('{0}: error opening file {1}/xconfig.expanded.2 for output'.format(
+                sys.argv[0], config_dir))
+
+    print('# This file was created by the command:\n'
+          '# ' + ' '.join(sys.argv) + '\n'
+          '# It contains the same content as ./xconfig but it was parsed,\n'
+          '# default config values were set, and Descriptors (input=xxx) were normalized.\n'
+          '# See also ./xconfig.expanded.1\n\n',
           file=xconfig_file_out)
 
     for layer in all_layers:
+        layer.NormalizeDescriptors()
         print(str(layer), file=xconfig_file_out)
     xconfig_file_out.close()
 
 
+
+
 # This function returns a map from config-file basename
 # e.g. 'init', 'ref', 'layer1' to a documentation string that goes
 # at the top of the file.
 def GetConfigHeaders():
     ans = defaultdict(str)  # resulting dict will default to the empty string
                             # for any config files not explicitly listed here.
-    ans['init'] = ("# This file was created by the command:\n"
-                   "# " + ' '.join(sys.argv) + "\n"
-                   "# It contains the input of the network and is used in\n"
-                   "# accumulating stats for an LDA-like transform of the\n"
-                   "# input features.\n");
-    ans['ref'] = ("# This file was created by the command:\n"
-                  "# " + ' '.join(sys.argv) + "\n"
-                  "# It contains the entire neural network, but with those\n"
-                  "# components that would normally require fixed vectors/matrices\n"
-                  "# read from disk, replaced with random initialization\n"
-                  "# (this applies to the LDA-like transform and the\n"
-                  "# presoftmax-prior-scale, if applicable).  This file\n"
-                  "# is used only to work out the left-context and right-context\n"
-                  "# of the network.\n");
-    ans['all'] = ("# This file was created by the command:\n"
-                  "# " + ' '.join(sys.argv) + "\n"
-                  "# It contains the entire neural network.  It might not be used\n"
-                  "# in the current scripts; it's provided for forward compatibility\n"
-                  "# to possible future changes.\n")
+    ans['init'] = ('# This file was created by the command:\n'
+                   '# ' + ' '.join(sys.argv) + '\n'
+                   '# It contains the input of the network and is used in\n'
+                   '# accumulating stats for an LDA-like transform of the\n'
+                   '# input features.\n');
+    ans['ref'] = ('# This file was created by the command:\n'
+                  '# ' + ' '.join(sys.argv) + '\n'
+                  '# It contains the entire neural network, but with those\n'
+                  '# components that would normally require fixed vectors/matrices\n'
+                  '# read from disk, replaced with random initialization\n'
+                  '# (this applies to the LDA-like transform and the\n'
+                  '# presoftmax-prior-scale, if applicable).  This file\n'
+                  '# is used only to work out the left-context and right-context\n'
+                  '# of the network.\n');
+    ans['all'] = ('# This file was created by the command:\n'
+                  '# ' + ' '.join(sys.argv) + '\n'
+                  '# It contains the entire neural network.  It might not be used\n'
+                  '# in the current scripts; it\'s provided for forward compatibility\n'
+                  '# to possible future changes.\n')
 
     # Note: currently we just copy all lines that were going to go to 'all', into
     # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py
-    ans['layer1'] = ("# This file was created by the command:\n"
-                     "# " + ' '.join(sys.argv) + "\n"
-                     "# It contains the configuration of the entire neural network.\n"
-                     "# The contents are the same\n"
-                     "# as 'all.config'.  The reason this file is named this way (and\n"
-                     "# that the config file `num_hidden_layers` contains 1, even though\n"
-                     "# this file may really contain more than 1 hidden layer), is\n"
-                     "# historical... we used to create networks by adding hidden layers\n"
-                     "# one by one (discriminative pretraining), but more recently we\n"
-                     "# have found that it's better to add them all at once.  This file\n"
-                     "# exists to enable the older training scripts to work.  Note:\n"
-                     "# it contains the inputs of the neural network even though it doesn't\n"
-                     "# have to (since they are included in 'init.config').  This will\n"
-                     "# give us the flexibility to change the scripts in future.\n");
+    ans['layer1'] = ('# This file was created by the command:\n'
+                     '# ' + ' '.join(sys.argv) + '\n'
+                     '# It contains the configuration of the entire neural network.\n'
+                     '# The contents are the same\n'
+                     '# as \'all.config\'.  The reason this file is named this way (and\n'
+                     '# that the config file `num_hidden_layers` contains 1, even though\n'
+                     '# this file may really contain more than 1 hidden layer), is\n'
+                     '# historical... we used to create networks by adding hidden layers\n'
+                     '# one by one (discriminative pretraining), but more recently we\n'
+                     '# have found that it\'s better to add them all at once.  This file\n'
+                     '# exists to enable the older training scripts to work.  Note:\n'
+                     '# it contains the inputs of the neural network even though it doesn\'t\n'
+                     '# have to (since they are included in \'init.config\').  This will\n'
+                     '# give us the flexibility to change the scripts in future.\n');
     return ans;
 
 
@@ -450,49 +183,58 @@ def GetConfigHeaders():
 
 # This is where most of the work of this program happens.
 def WriteConfigFiles(config_dir, all_layers):
-    config_basename_to_lines = defaultdict(list)2
+    # config_basename_to_lines is map from the basename of the
+    # config, as a string (i.e. 'ref', 'all', 'init') to a list of
+    # strings representing lines to put in the config file.
+    config_basename_to_lines = defaultdict(list)
 
     config_basename_to_header = GetConfigHeaders()
 
+    for layer in all_layers:
+        try:
+            pairs = layer.GetFullConfig()
+            for config_basename, line in pairs:
+                config_basename_to_lines[config_basename].append(line)
+        except Exception as e:
+            sys.exit('{0}: error producing config lines from xconfig '
+                     'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer),
+                                                         repr(e)))
+
+    # currently we don't expect any of the GetFullConfig functions to output to
+    # config-basename 'layer1'... currently we just copy this from
+    # config-basename 'all', for back-compatibility to older scripts.
+    assert not 'layer1' in config_basename_to_lines
+    config_basename_to_lines['layer1'] = config_basename_to_lines['all']
+
+    for basename,lines in config_basename_to_lines.items():
+        header = config_basename_to_header[basename]
+        filename = '{0}/{1}.config'.format(config_dir, basename)
+        try:
+            f = open(filename, 'w')
+            print(header, file=f)
+            for line in lines:
+                print(line, file=f)
+            f.close()
+        except Exception as e:
+            sys.exit('{0}: error writing to config file {1}: error is {2}'.format(
+                    sys.argv[0], filename, repr(e)))
+
 
 
 
 
 def Main():
     args = GetArgs()
-
     BackUpXconfigFile(args.xconfig_file, args.config_dir)
-
     all_layers = ReadXconfigFile(args.xconfig_file)
+    WriteExpandedXconfigFiles(args.config_dir, all_layers)
+    WriteConfigFiles(args.config_dir, all_layers)
 
-    WriteExpandedXconfigFile(args.config_dir all_layers)
 
-    try:
-        f =
-        shutil.copyfile(args.xconfig_file, args.xconfig_dir
-
-    MakeConfigs(config_dir = args.config_dir,
-                splice_indexes_string = args.splice_indexes,
-                feat_dim = args.feat_dim, ivector_dim = args.ivector_dim,
-                num_targets = args.num_targets,
-                add_lda = args.add_lda,
-                cnn_layer = args.cnn_layer,
-                cnn_bottleneck_dim = args.cnn_bottleneck_dim,
-                cepstral_lifter = args.cepstral_lifter,
-                nonlin_type = args.nonlin_type,
-                nonlin_input_dim = args.nonlin_input_dim,
-                nonlin_output_dim = args.nonlin_output_dim,
-                subset_dim = args.subset_dim,
-                nonlin_output_dim_init = args.nonlin_output_dim_init,
-                nonlin_output_dim_final = args.nonlin_output_dim_final,
-                use_presoftmax_prior_scale = args.use_presoftmax_prior_scale,
-                final_layer_normalize_target = args.final_layer_normalize_target,
-                include_log_softmax = args.include_log_softmax,
-                add_final_sigmoid = args.add_final_sigmoid,
-                xent_regularize = args.xent_regularize,
-                xent_separate_forward_affine = args.xent_separate_forward_affine,
-                self_repair_scale = args.self_repair_scale_nonlinearity,
-                objective_type = args.objective_type)
-
-if __name__ == "__main__":
+
+if __name__ == '__main__':
     Main()
+
+
+# test:
+# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo

From a2d120a5c3103a07640cc5071b1a5e9192f6deba Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Thu, 3 Nov 2016 17:53:55 -0400
Subject: [PATCH 07/12] Import modules only

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 21 +++++++++----------
 .../libs/{xconfig_lib.py => xconfig_utils.py} |  0
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  |  6 +++---
 3 files changed, 13 insertions(+), 14 deletions(-)
 rename egs/wsj/s5/steps/nnet3/libs/{xconfig_lib.py => xconfig_utils.py} (100%)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index c8511600b6c..7af70d96ae8 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -7,8 +7,7 @@
 import traceback
 import time
 import argparse
-from xconfig_lib import *
-
+import xconfig_utils
 
 
 # A base-class for classes representing layers of xconfig files.
@@ -29,7 +28,7 @@ def __init__(self, first_token, key_to_value, all_layers):
         if not 'name' in key_to_value:
             raise RuntimeError("Expected 'name' to be specified.")
         self.name = key_to_value['name']
-        if not IsValidLineName(self.name):
+        if not xconfig_utils.IsValidLineName(self.name):
             raise RuntimeError("Invalid value: name={0}".format(key_to_value['name']))
 
         # the following, which should be overridden in the child class, sets
@@ -53,7 +52,7 @@ def SetConfigs(self, key_to_value, all_layers):
                 if not key in self.config:
                     raise RuntimeError("Configuration value {0}={1} was not expected in "
                                     "layer of type {2}".format(key, value, self.layer_type))
-                self.config[key] = ConvertValueToType(key, type(self.config[key]), value)
+                self.config[key] = xconfig_utils.ConvertValueToType(key, type(self.config[key]), value)
 
 
         self.descriptors = dict()
@@ -119,10 +118,10 @@ def NormalizeDescriptors(self):
     # of type XconfigLayerBase) so that it can work out a list of the names of
     # other layers, and get dimensions from them.
     def ConvertToDescriptor(self, descriptor_string, all_layers):
-        prev_names = GetPrevNames(all_layers, self)
-        tokens = TokenizeDescriptor(descriptor_string, prev_names)
+        prev_names = xconfig_utils.GetPrevNames(all_layers, self)
+        tokens = xconfig_utils.TokenizeDescriptor(descriptor_string, prev_names)
         pos = 0
-        (descriptor, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+        (descriptor, pos) = xconfig_utils.ParseNewDescriptor(tokens, pos, prev_names)
         # note: 'pos' should point to the 'end of string' marker
         # that terminates 'tokens'.
         if pos != len(tokens) - 1:
@@ -133,14 +132,14 @@ def ConvertToDescriptor(self, descriptor_string, all_layers):
     # Returns the dimension of a Descriptor object.
     # This is a convenience function used in SetConfigs.
     def GetDimForDescriptor(self, descriptor, all_layers):
-        layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name)
+        layer_to_dim_func = lambda name: xconfig_utils.GetDimFromLayerName(all_layers, self, name)
         return descriptor.Dim(layer_to_dim_func)
 
     # Returns the 'final' string form of a Descriptor object, as could be used
     # in config files.
     # This is a convenience function provided for use in child classes;
     def GetStringForDescriptor(self, descriptor, all_layers):
-        layer_to_string_func = lambda name: GetStringFromLayerName(all_layers, self, name)
+        layer_to_string_func = lambda name: xconfig_utils.GetStringFromLayerName(all_layers, self, name)
         return descriptor.ConfigString(layer_to_string_func)
 
     # Name() returns the name of this layer, e.g. 'affine1'.  It does not
@@ -325,7 +324,7 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
 # 'prev_names' is a list of the names of preceding lines of the
 # config file.
 def ConfigLineToObject(config_line, prev_names = None):
-    (first_token, key_to_value) = ParseConfigLine(config_line)
+    (first_token, key_to_value) = xconfig_utils.ParseConfigLine(config_line)
     return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names)
 
 
@@ -345,7 +344,7 @@ def ReadXconfigFile(xconfig_filename):
         line = f.readline()
         if line == '':
             break
-        x = ParseConfigLine(line)
+        x = xconfig_utils.ParseConfigLine(line)
         if x is None:
             continue   # line was blank or only comments.
         (first_token, key_to_value) = x
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
similarity index 100%
rename from egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py
rename to egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 2684e062e8e..56404a0e17d 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -16,8 +16,8 @@
 # the following is in case we weren't running this from the normal directory.
 sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/')
 
-from xconfig_lib import *
-from xconfig_layers import *
+import xconfig_utils
+import xconfig_layers
 
 
 def GetArgs():
@@ -226,7 +226,7 @@ def WriteConfigFiles(config_dir, all_layers):
 def Main():
     args = GetArgs()
     BackUpXconfigFile(args.xconfig_file, args.config_dir)
-    all_layers = ReadXconfigFile(args.xconfig_file)
+    all_layers = xconfig_layers.ReadXconfigFile(args.xconfig_file)
     WriteExpandedXconfigFiles(args.config_dir, all_layers)
     WriteConfigFiles(args.config_dir, all_layers)
 

From 0e294a4726805789f6c92fad6ce7613214f8660f Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Thu, 3 Nov 2016 23:37:11 -0400
Subject: [PATCH 08/12] Add output layer with affine component

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 133 +++++++++++++++++-
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  |  11 +-
 2 files changed, 137 insertions(+), 7 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index 7af70d96ae8..5a7301696c8 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -291,13 +291,138 @@ def GetFullConfig(self):
         # note: each value of self.descriptors is (descriptor, dim,
         # normalized-string, output-string).
         # by 'output-string' we mean a string that can appear in
-        # config-files, i.e. it contains the 'final' names of
-        descriptor_output_str = self.descriptors['input'][3]
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_str = self.descriptors['input'][3]
 
         for config_name in [ 'ref', 'all' ]:
             ans.append( (config_name,
                          'output-node name={0} input={1}'.format(
-                        self.name, descriptor_output_str)))
+                        self.name, descriptor_final_str)))
+        return ans
+
+
+# This class is for lines like
+#  'output-layer name=output dim=4257 input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))'
+# By default this includes a log-softmax component.  The parameters are initialized to zero, as
+# this is best for output layers.
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer, will normally equal the number of pdfs.]
+#   include-log-softmax=true [setting it to false will omit the log-softmax component- useful for chain
+#                              models.]
+#   objective-type=linear    [the only other choice currently is 'quadratic', for use in regression
+#                             problems]
+
+#   learning-rate-factor=1.0 [Learning rate factor for the final affine component, multiplies the
+#                              standard learning rate. normally you'll leave this as-is, but for
+#                              xent regularization output layers for chain models you'll want to set
+#                              learning-rate-factor=(0.5/xent_regularize), normally
+#                              learning-rate-factor=5.0 since xent_regularize is normally 0.1.
+#   presoftmax-scale-file=''  [If set, a filename for a vector that will be used to scale the output
+#                              of the affine component before the log-softmax (if
+#                              include-log-softmax=true), or before the output (if not).  This is
+#                              helpful to avoid instability in training due to some classes having
+#                              much more data than others.  The way we normally create this vector
+#                              is to take the priors of the classes to the power -0.25 and rescale
+#                              them so the average is 1.0.  This factor -0.25 is referred to
+#                              as presoftmax_prior_scale_power in scripts.]
+#                              In the scripts this would normally be set to config_dir/presoftmax_prior_scale.vec
+class XconfigOutputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'output-layer'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'include-log-softmax':True,
+                        'objective-type':'linear', 'learning-rate-factor':1.0,
+                        'include-log-softmax':True, 'presoftmax-scale-file':'' }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise RuntimeError("In output-layer, dim has invalid value {0}".format(self.config['dim']))
+        if self.config['objective-type'] != 'linear' and self.config['objective_type'] != 'quadratic':
+            raise RuntimeError("In output-layer, objective-type has invalid value {0}".format(
+                    self.config['objective-type']))
+        if self.config['learning-rate-factor'] <= 0.0:
+            raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format(
+                    self.config['learning-rate-factor']))
+
+        pass  # nothing to check; descriptor-parsing can't happen in this function.
+
+
+    # you cannot access the output of this layer from other layers... see
+    # comment in OutputName for the reason why.
+    def Qualifiers(self):
+        return []
+
+    def OutputName(self, qualifier = None):
+        # Note: nodes of type output-node in nnet3 may not be accessed in Descriptors,
+        # so calling this with qualifier=None doesn't make sense.  But it might make
+        # sense to make the output of the softmax layer and/or the output of the
+        # affine layer available as inputs to other layers, in some circumstances.
+        # we'll implement that when it's needed.
+        raise RuntimeError("Outputs of output-layer may not be used by other layers")
+
+    def OutputDim(self, qualifier = None):
+        # see comment in OutputName().
+        raise RuntimeError("Outputs of output-layer may not be used by other layers")
+
+    def GetFullConfig(self):
+        # the input layers need to be printed in 'init.config' (which
+        # initializes the neural network prior to the LDA), in 'ref.config',
+        # which is a version of the config file used for getting left and right
+        # context (it doesn't read anything for the LDA-like transform and/or
+        # presoftmax-prior-scale components)
+        # In 'full.config' we write everything, this is just for reference,
+        # and also for cases where we don't use the LDA-like transform.
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        objective_type = self.config['objective-type']
+        learning_rate_factor = self.config['learning-rate-factor']
+        include_log_softmax = self.config['include-log-softmax']
+        presoftmax_scale_file = self.config['presoftmax-scale-file']
+
+        for config_name in [ 'ref', 'all' ]:
+            # First the affine node.
+            line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} '
+                    'output-dim={2} param-stddev=0 bias-stddev=0 '.format(
+                    self.name, input_dim, output_dim) +
+                    ('learning-rate-factor={0} '.format(learning_rate_factor)
+                     if learning_rate_factor != 1.0 else ''))
+            ans.append((config_name, line))
+            line = ('component-node name={0}.affine component={0}.affine input={1}'.format(
+                    self.name, descriptor_final_string))
+            ans.append((config_name, line))
+            cur_node = '{0}.affine'.format(descriptor_final_string)
+            if presoftmax_scale_file != '' and config_name == 'all':
+                # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the
+                # time we evaluate it.  (ref.config is used to find the left/right context).
+                line = ('component name={0}.fixed-scale type=FixedScaleComponent scales={1}'.format(
+                        self.name, presoftmax_scale_file))
+                ans.append((config_name, line))
+                line = ('component-node name={0}.fixed-scale component={0}.fixed-scale input={1}'.format(
+                        self.name, cur_node))
+                ans.append((config_name, line))
+                cur_node = '{0}.fixed-scale'.format(self.name)
+            if include_log_softmax:
+                line = ('component name={0}.log-softmax type=LogSoftmaxComponent dim={1}'.format(
+                        self.name, output_dim))
+                ans.append((config_name, line))
+                line = ('component-node name={0}.log-softmax component={0}.log-softmax input={1}'.format(
+                        self.name, cur_node))
+                ans.append((config_name, line))
+                cur_node = '{0}.log-softmax'.format(self.name)
+            line = ('output-node name={0} input={0}.log-softmax'.format(self.name, cur_node))
+            ans.append((config_name, line))
         return ans
 
 
@@ -312,6 +437,8 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
         return XconfigInputLayer(first_token, key_to_value, prev_names)
     elif first_token == 'output':
         return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'output-layer':
+        return XconfigOutputLayer(first_token, key_to_value, prev_names)
     else:
         raise RuntimeError("Error parsing xconfig line (no such layer type): " +
                         first_token + ' ' +
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 56404a0e17d..50ad3d4d800 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -196,9 +196,10 @@ def WriteConfigFiles(config_dir, all_layers):
             for config_basename, line in pairs:
                 config_basename_to_lines[config_basename].append(line)
         except Exception as e:
-            sys.exit('{0}: error producing config lines from xconfig '
+            print('{0}: error producing config lines from xconfig '
                      'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer),
-                                                         repr(e)))
+                                                         repr(e)), file=sys.stderr)
+            raise(e)
 
     # currently we don't expect any of the GetFullConfig functions to output to
     # config-basename 'layer1'... currently we just copy this from
@@ -216,8 +217,9 @@ def WriteConfigFiles(config_dir, all_layers):
                 print(line, file=f)
             f.close()
         except Exception as e:
-            sys.exit('{0}: error writing to config file {1}: error is {2}'.format(
-                    sys.argv[0], filename, repr(e)))
+            print('{0}: error writing to config file {1}: error is {2}'.format(
+                    sys.argv[0], filename, repr(e)), file=sys.stderr)
+            raise e
 
 
 
@@ -238,3 +240,4 @@ def Main():
 
 # test:
 # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+#  mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo

From 7a05e2265a3e7b897769ca686bb0f903e751dcc9 Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Fri, 4 Nov 2016 01:09:09 -0400
Subject: [PATCH 09/12] Add support for common layer types such as relu+renorm.

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 124 ++++++++++++++++--
 egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py  |   6 +
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  |   2 +
 3 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index 5a7301696c8..e71b36cd63e 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -349,8 +349,6 @@ def CheckConfigs(self):
             raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format(
                     self.config['learning-rate-factor']))
 
-        pass  # nothing to check; descriptor-parsing can't happen in this function.
-
 
     # you cannot access the output of this layer from other layers... see
     # comment in OutputName for the reason why.
@@ -370,13 +368,6 @@ def OutputDim(self, qualifier = None):
         raise RuntimeError("Outputs of output-layer may not be used by other layers")
 
     def GetFullConfig(self):
-        # the input layers need to be printed in 'init.config' (which
-        # initializes the neural network prior to the LDA), in 'ref.config',
-        # which is a version of the config file used for getting left and right
-        # context (it doesn't read anything for the LDA-like transform and/or
-        # presoftmax-prior-scale components)
-        # In 'full.config' we write everything, this is just for reference,
-        # and also for cases where we don't use the LDA-like transform.
         ans = []
 
         # note: each value of self.descriptors is (descriptor, dim,
@@ -391,6 +382,9 @@ def GetFullConfig(self):
         include_log_softmax = self.config['include-log-softmax']
         presoftmax_scale_file = self.config['presoftmax-scale-file']
 
+
+        # note: ref.config is used only for getting the left-context and right-context
+        # of the network; all.config is where we put the actual network definition.
         for config_name in [ 'ref', 'all' ]:
             # First the affine node.
             line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} '
@@ -402,7 +396,7 @@ def GetFullConfig(self):
             line = ('component-node name={0}.affine component={0}.affine input={1}'.format(
                     self.name, descriptor_final_string))
             ans.append((config_name, line))
-            cur_node = '{0}.affine'.format(descriptor_final_string)
+            cur_node = '{0}.affine'.format(self.name)
             if presoftmax_scale_file != '' and config_name == 'all':
                 # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the
                 # time we evaluate it.  (ref.config is used to find the left/right context).
@@ -426,6 +420,114 @@ def GetFullConfig(self):
         return ans
 
 
+# This class is for lines like
+#  'relu-renorm-layer name=layer1 dim=1024 input=Append(-3,0,3)'
+# or:
+#  'sigmoid-layer name=layer1 dim=1024 input=Append(-3,0,3)'
+# Here, the name of the layer itself dictates the sequence of nonlinearities
+# that are applied; the name should contain some combination of 'relu', 'renorm',
+# 'sigmoid' and 'tanh', and these nonlinearities will be added after the
+# affine component.
+#
+# The dimension specified is the output dim; the input dim is worked out from the input descriptor.
+# This class supports only nonlinearity types that do not change the dimension; we can create
+# another layer type to enable the use p-norm and similar dimension-reducing nonlinearities.
+#
+# See other configuration values below.
+#
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer, e.g. 1024]
+#   self-repair-scale=1.0e-05  [Affects relu, sigmoid and tanh layers.]
+#
+# Configuration values that we might one day want to add here, but which we
+# don't yet have, include target-rms (affects 'renorm' component).
+class XconfigSimpleLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        # Here we just list some likely combinations.. you can just add any
+        # combinations you want to use, to this list.
+        assert first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer',
+                                'tanh-layer' ]
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'self-repair-scale':1.0e-05 }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise RuntimeError("In {0}, dim has invalid value {1}".format(self.layer_type,
+                                                                          self.config['dim']))
+        if self.config['self-repair-scale'] < 0.0 or self.config['self-repair-scale'] > 1.0:
+            raise RuntimeError("In {0}, objective-type has invalid value {0}".format(
+                    self.layer_type, self.config['self-repair-scale']))
+
+    def OutputName(self, qualifier = None):
+        assert qualifier == None
+
+        split_layer_name = self.layer_type.split('-')
+        assert split_layer_name[-1] == 'layer'
+        last_nonlinearity = split_layer_name[-2]
+        # return something like: layer3.renorm
+        return '{0}.{1}'.format(self.name, last_nonlinearity)
+
+    def OutputDim(self, qualifier = None):
+        return self.config['dim']
+
+    def GetFullConfig(self):
+
+        ans = []
+
+        split_layer_name = self.layer_type.split('-')
+        assert split_layer_name[-1] == 'layer'
+        nonlinearities = split_layer_name[:-1]
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        self_repair_scale = self.config['self-repair-scale']
+
+        for config_name in [ 'ref', 'all' ]:
+            # First the affine node.
+            line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} '
+                    'output-dim={2} '.format(self.name, input_dim, output_dim))
+            ans.append((config_name, line))
+            line = ('component-node name={0}.affine component={0}.affine input={1}'.format(
+                    self.name, descriptor_final_string))
+            ans.append((config_name, line))
+            cur_node = '{0}.affine'.format(self.name)
+
+            for nonlinearity in nonlinearities:
+                if nonlinearity == 'relu':
+                    line = ('component name={0}.{1} type=RectifiedLinearComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'sigmoid':
+                    line = ('component name={0}.{1} type=SigmoidComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'tanh':
+                    line = ('component name={0}.{1} type=TanhComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'renorm':
+                    line = ('component name={0}.{1} type=NormalizeComponent dim={2} '.format(
+                            self.name, nonlinearity, output_dim))
+                else:
+                    raise RuntimeError("Unknown nonlinearity type: {0}".format(nonlinearity))
+                ans.append((config_name, line))
+                line = 'component-node name={0}.{1} component={0}.{1} input={2}'.format(
+                    self.name, nonlinearity, cur_node)
+                ans.append((config_name, line))
+                cur_node = '{0}.{1}'.format(self.name, nonlinearity)
+        return ans
+
+
 # Converts a line as parsed by ParseConfigLine() into a first
 # token e.g. 'input-layer' and a key->value map, into
 # an objet inherited from XconfigLayerBase.
@@ -439,6 +541,8 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
         return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names)
     elif first_token == 'output-layer':
         return XconfigOutputLayer(first_token, key_to_value, prev_names)
+    elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]:
+        return XconfigSimpleLayer(first_token, key_to_value, prev_names)
     else:
         raise RuntimeError("Error parsing xconfig line (no such layer type): " +
                         first_token + ' ' +
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
index 782e6ebd3e1..5744ec4fc46 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
@@ -27,6 +27,12 @@ def GetPrevNames(all_layers, current_layer):
         if layer is current_layer:
             break
         prev_names.append(layer.Name())
+    prev_names_set = set()
+    for name in prev_names:
+        if name in prev_names_set:
+            raise RuntimeError("{0}: Layer name {1} is used more than once.".format(
+                    sys.argv[0], name))
+        prev_names_set.add(name)
     return prev_names
 
 # [utility function used in xconfig_layers.py]
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 50ad3d4d800..90e13cb46e2 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -241,3 +241,5 @@ def Main():
 # test:
 # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
 #  mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo

From b5c6175ec3f1bae605d1b7e294cedc48c920783c Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Fri, 4 Nov 2016 01:29:01 -0400
Subject: [PATCH 10/12] Remove unused config.

---
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 90e13cb46e2..5a7bc767c8a 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -25,10 +25,6 @@ def GetArgs():
     parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files '
                                      'for neural net creation and training',
                                      epilog='Search egs/*/*/local/nnet3/*sh for examples')
-
-    parser.add_argument('--self-repair-scale-nonlinearity', type=float,
-                        help='A non-zero value activates the self-repair mechanism in '
-                        'nonlinearities (larger -> faster self-repair)', default=1.0e-05)
     parser.add_argument('xconfig_file',
                         help='Filename of input xconfig file')
     parser.add_argument('config_dir',
@@ -44,10 +40,6 @@ def GetArgs():
 def CheckArgs(args):
     if not os.path.exists(args.config_dir):
         os.makedirs(args.config_dir)
-    if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1:
-        sys.exit('{0}: invalid option --self-repair-scale-nonlinearity={1}'.format(
-            sys.argv[0], args.self_repair_scale_nonlinearity))
-
     return args
 
 

From 6592d945ad1ebc740b395774e36a9b0e71f5c989 Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Fri, 4 Nov 2016 02:01:50 -0400
Subject: [PATCH 11/12] Add support for fixed-affine-layer

---
 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 82 ++++++++++++++++++-
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py  |  4 +
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
index e71b36cd63e..2990e290152 100644
--- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -528,6 +528,84 @@ def GetFullConfig(self):
         return ans
 
 
+# This class is for lines like
+#  'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'
+#
+# The output dimension of the layer may be specified via 'dim=xxx', but if not specified,
+# the dimension defaults to the same as the input.  Note: we don't attempt to read that
+# file at the time the config is created, because in the recipes, that file is created
+# after the config files.
+#
+# See other configuration values below.
+#
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer; defaults to the same as the input dim.]
+#   affine-transform-file='' [Must be specified.]
+#
+# Configuration values that we might one day want to add here, but which we
+# don't yet have, include target-rms (affects 'renorm' component).
+class XconfigFixedAffineLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'fixed-affine-layer'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'affine-transform-file':'' }
+
+    def CheckConfigs(self):
+        if self.config['affine-transform-file'] == '':
+            raise RuntimeError("In fixed-affine-layer, affine-transform-file must be set.")
+
+    def OutputName(self, qualifier = None):
+        assert qualifier == None
+        return self.name
+
+    def OutputDim(self, qualifier = None):
+        output_dim = self.config['dim']
+        # If not set, the output-dim defaults to the input-dim.
+        if output_dim <= 0:
+            output_dim = self.descriptors['input'][1]
+        return output_dim
+
+    def GetFullConfig(self):
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        transform_file = self.config['affine-transform-file']
+        if output_dim <= 0:
+            output_dim = input_dim
+
+
+        # to init.config we write an output-node with the name 'output' and
+        # with a Descriptor equal to the descriptor that's the input to this
+        # layer.  This will be used to accumulate stats to learn the LDA transform.
+        line = 'output-node name=output input={0}'.format(descriptor_final_string)
+        ans.append(('init', line))
+
+        # write the 'real' component to all.config
+        line = 'component name={0} type=FixedAffineComponent matrix={1}'.format(
+            self.name, transform_file)
+        ans.append(('all', line))
+        # write a random version of the component, with the same dims, to ref.config
+        line = 'component name={0} type=FixedAffineComponent input-dim={1} output-dim={2}'.format(
+            self.name, input_dim, output_dim)
+        ans.append(('ref', line))
+        # the component-node gets written to all.config and ref.config.
+        line = 'component-node name={0} component={0} input={1}'.format(
+            self.name, descriptor_final_string)
+        ans.append(('all', line))
+        ans.append(('ref', line))
+        return ans
+
 # Converts a line as parsed by ParseConfigLine() into a first
 # token e.g. 'input-layer' and a key->value map, into
 # an objet inherited from XconfigLayerBase.
@@ -543,10 +621,12 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
         return XconfigOutputLayer(first_token, key_to_value, prev_names)
     elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]:
         return XconfigSimpleLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'fixed-affine-layer':
+        return XconfigFixedAffineLayer(first_token, key_to_value, prev_names)
     else:
         raise RuntimeError("Error parsing xconfig line (no such layer type): " +
                         first_token + ' ' +
-                        ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()]))
+                        ' '.join(['{0}={1}'.format(x,y) for x,y in key_to_value.items()]))
 
 
 # Uses ParseConfigLine() to turn a config line that has been parsed into
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 5a7bc767c8a..60a2ba384b8 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -235,3 +235,7 @@ def Main():
 #  mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
 
 # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'input dim=100 name=ivector'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+

From e99cea915d1d5020011b7f702cf19c65b800b8ac Mon Sep 17 00:00:00 2001
From: Dan Povey <dpovey@gmail.com>
Date: Fri, 4 Nov 2016 02:23:29 -0400
Subject: [PATCH 12/12] Small fix to example command.

---
 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
index 60a2ba384b8..bd841aae1f2 100755
--- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -236,6 +236,6 @@ def Main():
 
 # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
 
-# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'input dim=100 name=ivector'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+# mkdir -p foo; (echo 'input dim=100 name=ivector'; echo 'input dim=40 name=input'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo