diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
new file mode 100644
index 00000000000..2990e290152
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py
@@ -0,0 +1,679 @@
+from __future__ import print_function
+import subprocess
+import logging
+import math
+import re
+import sys
+import traceback
+import time
+import argparse
+import xconfig_utils
+
+
+# A base-class for classes representing layers of xconfig files.
+# This mainly just sets self.layer_type, self.name and self.config,
+class XconfigLayerBase(object):
+    # Constructor.
+    # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f
+    # key_to_value is a dict like:
+    # { 'name':'affine1', 'input':'Append(0, 1, 2, ReplaceIndex(ivector, t, 0))', 'dim=1024' }.
+    # The only required and 'special' values that are dealt with directly at this level, are
+    # 'name' and 'input'.
+    # The rest are put in self.config and are dealt with by the child classes' init functions.
+    # all_layers is an array of objects inheriting XconfigLayerBase for all previously
+    # parsed layers.
+
+    def __init__(self, first_token, key_to_value, all_layers):
+        self.layer_type = first_token
+        if not 'name' in key_to_value:
+            raise RuntimeError("Expected 'name' to be specified.")
+        self.name = key_to_value['name']
+        if not xconfig_utils.IsValidLineName(self.name):
+            raise RuntimeError("Invalid value: name={0}".format(key_to_value['name']))
+
+        # the following, which should be overridden in the child class, sets
+        # default config parameters in self.config.
+        self.SetDefaultConfigs()
+        # The following is not to be reimplemented in child classes;
+        # it sets the config values to those specified by the user, and
+        # parses any Descriptors.
+        self.SetConfigs(key_to_value, all_layers)
+        # the following, which should be overridden in the child class, checks
+        # that the config parameters that have been set are reasonable.
+        self.CheckConfigs()
+
+
+    # We broke this code out of __init__ for clarity.
+    def SetConfigs(self, key_to_value, all_layers):
+        # the child-class constructor will deal with the configuration values
+        # in a more specific way.
+        for key,value in key_to_value.items():
+            if key != 'name':
+                if not key in self.config:
+                    raise RuntimeError("Configuration value {0}={1} was not expected in "
+                                    "layer of type {2}".format(key, value, self.layer_type))
+                self.config[key] = xconfig_utils.ConvertValueToType(key, type(self.config[key]), value)
+
+
+        self.descriptors = dict()
+        self.descriptor_dims = dict()
+        # Parse Descriptors and get their dims and their 'final' string form.
+        # Put them as 4-tuples (descriptor, string, normalized-string, final-string)
+        # in self.descriptors[key]
+        for key in self.GetDescriptorConfigs():
+            if not key in self.config:
+                raise RuntimeError("{0}: object of type {1} needs to override "
+                                   "GetDescriptorConfigs()".format(sys.argv[0],
+                                                                   str(type(self))))
+            descriptor_string = self.config[key]  # input string.
+            assert isinstance(descriptor_string, str)
+            desc = self.ConvertToDescriptor(descriptor_string, all_layers)
+            desc_dim = self.GetDimForDescriptor(desc, all_layers)
+            desc_norm_str = desc.str()
+            # desc_output_str contains the "final" component names, those that
+            # appear in the actual config file (i.e. not names like
+            # 'layer.qualifier'); that's how it differs from desc_norm_str.
+            # Note: it's possible that the two strings might be the same in
+            # many, even most, cases-- it depends whether OutputName(self, qualifier)
+            # returns self.Name() + '.' + qualifier when qualifier is not None.
+            # That's up to the designer of the layer type.
+            desc_output_str = self.GetStringForDescriptor(desc, all_layers)
+            self.descriptors[key] = (desc, desc_dim, desc_norm_str, desc_output_str)
+            # the following helps to check the code by parsing it again.
+            desc2 = self.ConvertToDescriptor(desc_norm_str, all_layers)
+            desc_norm_str2 = desc2.str()
+            # if the following ever fails we'll have to do some debugging.
+            if desc_norm_str != desc_norm_str2:
+                raise RuntimeError("Likely code error: '{0}' != '{1}'".format(
+                        desc_norm_str, desc_norm_str2))
+
+    # This function converts 'this' to a string which could be printed to an
+    # xconfig file; in xconfig_to_configs.py we actually expand all the lines to
+    # strings and write it as xconfig.expanded as a reference (so users can
+    # see any defaults).
+    def str(self):
+        ans = '{0} name={1}'.format(self.layer_type, self.name)
+        ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key])
+                                for key in sorted(self.config.keys())])
+        return ans
+
+    def __str__(self):
+        return self.str()
+
+
+    # This function converts any config variables in self.config which
+    # correspond to Descriptors, into a 'normalized form' derived from parsing
+    # them as Descriptors, replacing things like [-1] with the actual layer
+    # names, and regenerating them as strings.  We stored this when the
+    # object was initialized, in self.descriptors; this function just copies them
+    # back to the config.
+    def NormalizeDescriptors(self):
+        for key,tuple in self.descriptors.items():
+            self.config[key] = tuple[2]  # desc_norm_str
+
+    # This function, which is a convenience function intended to be called from
+    # child classes, converts a string representing a descriptor
+    # ('descriptor_string') into an object of type Descriptor, and returns it.
+    # It needs 'self' and 'all_layers' (where 'all_layers' is a list of objects
+    # of type XconfigLayerBase) so that it can work out a list of the names of
+    # other layers, and get dimensions from them.
+    def ConvertToDescriptor(self, descriptor_string, all_layers):
+        prev_names = xconfig_utils.GetPrevNames(all_layers, self)
+        tokens = xconfig_utils.TokenizeDescriptor(descriptor_string, prev_names)
+        pos = 0
+        (descriptor, pos) = xconfig_utils.ParseNewDescriptor(tokens, pos, prev_names)
+        # note: 'pos' should point to the 'end of string' marker
+        # that terminates 'tokens'.
+        if pos != len(tokens) - 1:
+            raise RuntimeError("Parsing Descriptor, saw junk at end: " +
+                            ' '.join(tokens[pos:-1]))
+        return descriptor
+
+    # Returns the dimension of a Descriptor object.
+    # This is a convenience function used in SetConfigs.
+    def GetDimForDescriptor(self, descriptor, all_layers):
+        layer_to_dim_func = lambda name: xconfig_utils.GetDimFromLayerName(all_layers, self, name)
+        return descriptor.Dim(layer_to_dim_func)
+
+    # Returns the 'final' string form of a Descriptor object, as could be used
+    # in config files.
+    # This is a convenience function provided for use in child classes;
+    def GetStringForDescriptor(self, descriptor, all_layers):
+        layer_to_string_func = lambda name: xconfig_utils.GetStringFromLayerName(all_layers, self, name)
+        return descriptor.ConfigString(layer_to_string_func)
+
+    # Name() returns the name of this layer, e.g. 'affine1'.  It does not
+    # necessarily correspond to a component name.
+    def Name(self):
+        return self.name
+
+    ######  Functions that might be overridden by the child class: #####
+
+    # child classes should override this.
+    def SetDefaultConfigs(self):
+        raise RuntimeError("Child classes must override SetDefaultConfigs().")
+
+    # child classes should override this.
+    def CheckConfigs(self):
+        pass
+
+    # This function, which may be (but usually will not have to be) overrideden
+    # by child classes, returns a list of keys/names of config variables that
+    # will be interpreted as Descriptors.  It is used in the function
+    # 'NormalizeDescriptors()'.  This implementation will work
+    # layer types whose only Descriptor-valued config is 'input'.
+
+    # If a child class adds more config variables that are interpreted as
+    # descriptors (e.g. to read auxiliary inputs), or does not have an input
+    # (e.g. the XconfigInputLayer), it should override this function's
+    # implementation to something like: `return ['input', 'input2']`
+    def GetDescriptorConfigs(self):
+        return [ 'input' ]
+
+    # Returns a list of all qualifiers (meaning auxiliary outputs) that this
+    # layer supports.  These are either 'None' for the regular output, or a
+    # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that
+    # the layer might provide.  Most layer types will not need to override this.
+    def Qualifiers(self):
+        return [ None ]
+
+    # Called with qualifier == None, this returns the component-node name of the
+    # principal output of the layer (or if you prefer, the text form of a
+    # descriptor that gives you such an output; such as Append(some_node,
+    # some_other_node)).
+    # The 'qualifier' argument is a text value that is designed for extensions
+    # to layers that have additional auxiliary outputs.  For example, to implement
+    # a highway LSTM you need the memory-cell of a layer, so you might allow
+    # qualifier='memory_cell' for such a layer type, and it would return the
+    # component node or a suitable Descriptor: something like 'lstm3.c_t'
+    def OutputName(self, qualifier = None):
+        raise RuntimeError("Child classes must override OutputName()")
+
+    # The dimension that this layer outputs.  The 'qualifier' parameter is for
+    # layer types which support auxiliary outputs.
+    def OutputDim(self, qualifier = None):
+        raise RuntimeError("Child classes must override OutputDim()")
+
+    # This function returns lines destined for the 'full' config format, as
+    # would be read by the C++ programs.
+    # Since the program xconfig_to_configs.py writes several config files, this
+    # function returns a list of pairs of the form (config_file_basename, line),
+    # e.g. something like
+    # [ ('init', 'input-node name=input dim=40'),
+    #   ('ref', 'input-node name=input dim=40') ]
+    # which would be written to config_dir/init.config and config_dir/ref.config.
+    def GetFullConfig(self):
+        raise RuntimeError("Child classes must override GetFullConfig()")
+
+
+# This class is for lines like
+# 'input name=input dim=40'
+# or
+# 'input name=ivector dim=100'
+# in the config file.
+class XconfigInputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'input'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+
+    def SetDefaultConfigs(self):
+        self.config = { 'dim':-1 }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise RuntimeError("Dimension of input-layer '{0}' is not set".format(self.name))
+
+    def GetDescriptorConfigs(self):
+        return []  # there is no 'input' field in self.config.
+
+    def OutputName(self, qualifier = None):
+        assert qualifier is None
+        return self.name
+
+    def OutputDim(self, qualifier = None):
+        assert qualifier is None
+        return self.config['dim']
+
+    def GetFullConfig(self):
+        # the input layers need to be printed in 'init.config' (which
+        # initializes the neural network prior to the LDA), in 'ref.config',
+        # which is a version of the config file used for getting left and right
+        # context (it doesn't read anything for the LDA-like transform and/or
+        # presoftmax-prior-scale components)
+        # In 'full.config' we write everything, this is just for reference,
+        # and also for cases where we don't use the LDA-like transform.
+        ans = []
+        for config_name in [ 'init', 'ref', 'all' ]:
+            ans.append( (config_name,
+                         'input-node name={0} dim={1}'.format(self.name,
+                                                              self.config['dim'])))
+        return ans
+
+
+
+# This class is for lines like
+# 'output name=output input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))'
+# This is for outputs that are not really output "layers" (there is no affine transform or
+# nonlinearity), they just directly map to an output-node in nnet3.
+class XconfigTrivialOutputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'output'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]' }
+
+    def CheckConfigs(self):
+        pass  # nothing to check; descriptor-parsing can't happen in this function.
+
+    def OutputName(self, qualifier = None):
+        assert qualifier is None
+        return self.name
+
+    def OutputDim(self, qualifier = None):
+        assert qualifier is None
+        # note: each value of self.descriptors is (descriptor, dim, normalized-string, output-string).
+        return self.descriptors['input'][1]
+
+    def GetFullConfig(self):
+        # the input layers need to be printed in 'init.config' (which
+        # initializes the neural network prior to the LDA), in 'ref.config',
+        # which is a version of the config file used for getting left and right
+        # context (it doesn't read anything for the LDA-like transform and/or
+        # presoftmax-prior-scale components)
+        # In 'full.config' we write everything, this is just for reference,
+        # and also for cases where we don't use the LDA-like transform.
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'output-string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_str = self.descriptors['input'][3]
+
+        for config_name in [ 'ref', 'all' ]:
+            ans.append( (config_name,
+                         'output-node name={0} input={1}'.format(
+                        self.name, descriptor_final_str)))
+        return ans
+
+
+# This class is for lines like
+#  'output-layer name=output dim=4257 input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))'
+# By default this includes a log-softmax component.  The parameters are initialized to zero, as
+# this is best for output layers.
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer, will normally equal the number of pdfs.]
+#   include-log-softmax=true [setting it to false will omit the log-softmax component- useful for chain
+#                              models.]
+#   objective-type=linear    [the only other choice currently is 'quadratic', for use in regression
+#                             problems]
+
+#   learning-rate-factor=1.0 [Learning rate factor for the final affine component, multiplies the
+#                              standard learning rate. normally you'll leave this as-is, but for
+#                              xent regularization output layers for chain models you'll want to set
+#                              learning-rate-factor=(0.5/xent_regularize), normally
+#                              learning-rate-factor=5.0 since xent_regularize is normally 0.1.
+#   presoftmax-scale-file=''  [If set, a filename for a vector that will be used to scale the output
+#                              of the affine component before the log-softmax (if
+#                              include-log-softmax=true), or before the output (if not).  This is
+#                              helpful to avoid instability in training due to some classes having
+#                              much more data than others.  The way we normally create this vector
+#                              is to take the priors of the classes to the power -0.25 and rescale
+#                              them so the average is 1.0.  This factor -0.25 is referred to
+#                              as presoftmax_prior_scale_power in scripts.]
+#                              In the scripts this would normally be set to config_dir/presoftmax_prior_scale.vec
+class XconfigOutputLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'output-layer'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'include-log-softmax':True,
+                        'objective-type':'linear', 'learning-rate-factor':1.0,
+                        'include-log-softmax':True, 'presoftmax-scale-file':'' }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise RuntimeError("In output-layer, dim has invalid value {0}".format(self.config['dim']))
+        if self.config['objective-type'] != 'linear' and self.config['objective_type'] != 'quadratic':
+            raise RuntimeError("In output-layer, objective-type has invalid value {0}".format(
+                    self.config['objective-type']))
+        if self.config['learning-rate-factor'] <= 0.0:
+            raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format(
+                    self.config['learning-rate-factor']))
+
+
+    # you cannot access the output of this layer from other layers... see
+    # comment in OutputName for the reason why.
+    def Qualifiers(self):
+        return []
+
+    def OutputName(self, qualifier = None):
+        # Note: nodes of type output-node in nnet3 may not be accessed in Descriptors,
+        # so calling this with qualifier=None doesn't make sense.  But it might make
+        # sense to make the output of the softmax layer and/or the output of the
+        # affine layer available as inputs to other layers, in some circumstances.
+        # we'll implement that when it's needed.
+        raise RuntimeError("Outputs of output-layer may not be used by other layers")
+
+    def OutputDim(self, qualifier = None):
+        # see comment in OutputName().
+        raise RuntimeError("Outputs of output-layer may not be used by other layers")
+
+    def GetFullConfig(self):
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        objective_type = self.config['objective-type']
+        learning_rate_factor = self.config['learning-rate-factor']
+        include_log_softmax = self.config['include-log-softmax']
+        presoftmax_scale_file = self.config['presoftmax-scale-file']
+
+
+        # note: ref.config is used only for getting the left-context and right-context
+        # of the network; all.config is where we put the actual network definition.
+        for config_name in [ 'ref', 'all' ]:
+            # First the affine node.
+            line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} '
+                    'output-dim={2} param-stddev=0 bias-stddev=0 '.format(
+                    self.name, input_dim, output_dim) +
+                    ('learning-rate-factor={0} '.format(learning_rate_factor)
+                     if learning_rate_factor != 1.0 else ''))
+            ans.append((config_name, line))
+            line = ('component-node name={0}.affine component={0}.affine input={1}'.format(
+                    self.name, descriptor_final_string))
+            ans.append((config_name, line))
+            cur_node = '{0}.affine'.format(self.name)
+            if presoftmax_scale_file != '' and config_name == 'all':
+                # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the
+                # time we evaluate it.  (ref.config is used to find the left/right context).
+                line = ('component name={0}.fixed-scale type=FixedScaleComponent scales={1}'.format(
+                        self.name, presoftmax_scale_file))
+                ans.append((config_name, line))
+                line = ('component-node name={0}.fixed-scale component={0}.fixed-scale input={1}'.format(
+                        self.name, cur_node))
+                ans.append((config_name, line))
+                cur_node = '{0}.fixed-scale'.format(self.name)
+            if include_log_softmax:
+                line = ('component name={0}.log-softmax type=LogSoftmaxComponent dim={1}'.format(
+                        self.name, output_dim))
+                ans.append((config_name, line))
+                line = ('component-node name={0}.log-softmax component={0}.log-softmax input={1}'.format(
+                        self.name, cur_node))
+                ans.append((config_name, line))
+                cur_node = '{0}.log-softmax'.format(self.name)
+            line = ('output-node name={0} input={0}.log-softmax'.format(self.name, cur_node))
+            ans.append((config_name, line))
+        return ans
+
+
+# This class is for lines like
+#  'relu-renorm-layer name=layer1 dim=1024 input=Append(-3,0,3)'
+# or:
+#  'sigmoid-layer name=layer1 dim=1024 input=Append(-3,0,3)'
+# Here, the name of the layer itself dictates the sequence of nonlinearities
+# that are applied; the name should contain some combination of 'relu', 'renorm',
+# 'sigmoid' and 'tanh', and these nonlinearities will be added after the
+# affine component.
+#
+# The dimension specified is the output dim; the input dim is worked out from the input descriptor.
+# This class supports only nonlinearity types that do not change the dimension; we can create
+# another layer type to enable the use p-norm and similar dimension-reducing nonlinearities.
+#
+# See other configuration values below.
+#
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer, e.g. 1024]
+#   self-repair-scale=1.0e-05  [Affects relu, sigmoid and tanh layers.]
+#
+# Configuration values that we might one day want to add here, but which we
+# don't yet have, include target-rms (affects 'renorm' component).
+class XconfigSimpleLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        # Here we just list some likely combinations.. you can just add any
+        # combinations you want to use, to this list.
+        assert first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer',
+                                'tanh-layer' ]
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'self-repair-scale':1.0e-05 }
+
+    def CheckConfigs(self):
+        if self.config['dim'] <= 0:
+            raise RuntimeError("In {0}, dim has invalid value {1}".format(self.layer_type,
+                                                                          self.config['dim']))
+        if self.config['self-repair-scale'] < 0.0 or self.config['self-repair-scale'] > 1.0:
+            raise RuntimeError("In {0}, objective-type has invalid value {0}".format(
+                    self.layer_type, self.config['self-repair-scale']))
+
+    def OutputName(self, qualifier = None):
+        assert qualifier == None
+
+        split_layer_name = self.layer_type.split('-')
+        assert split_layer_name[-1] == 'layer'
+        last_nonlinearity = split_layer_name[-2]
+        # return something like: layer3.renorm
+        return '{0}.{1}'.format(self.name, last_nonlinearity)
+
+    def OutputDim(self, qualifier = None):
+        return self.config['dim']
+
+    def GetFullConfig(self):
+
+        ans = []
+
+        split_layer_name = self.layer_type.split('-')
+        assert split_layer_name[-1] == 'layer'
+        nonlinearities = split_layer_name[:-1]
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        self_repair_scale = self.config['self-repair-scale']
+
+        for config_name in [ 'ref', 'all' ]:
+            # First the affine node.
+            line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} '
+                    'output-dim={2} '.format(self.name, input_dim, output_dim))
+            ans.append((config_name, line))
+            line = ('component-node name={0}.affine component={0}.affine input={1}'.format(
+                    self.name, descriptor_final_string))
+            ans.append((config_name, line))
+            cur_node = '{0}.affine'.format(self.name)
+
+            for nonlinearity in nonlinearities:
+                if nonlinearity == 'relu':
+                    line = ('component name={0}.{1} type=RectifiedLinearComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'sigmoid':
+                    line = ('component name={0}.{1} type=SigmoidComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'tanh':
+                    line = ('component name={0}.{1} type=TanhComponent dim={2} '
+                            'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim,
+                                                           self_repair_scale))
+                elif nonlinearity == 'renorm':
+                    line = ('component name={0}.{1} type=NormalizeComponent dim={2} '.format(
+                            self.name, nonlinearity, output_dim))
+                else:
+                    raise RuntimeError("Unknown nonlinearity type: {0}".format(nonlinearity))
+                ans.append((config_name, line))
+                line = 'component-node name={0}.{1} component={0}.{1} input={2}'.format(
+                    self.name, nonlinearity, cur_node)
+                ans.append((config_name, line))
+                cur_node = '{0}.{1}'.format(self.name, nonlinearity)
+        return ans
+
+
+# This class is for lines like
+#  'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'
+#
+# The output dimension of the layer may be specified via 'dim=xxx', but if not specified,
+# the dimension defaults to the same as the input.  Note: we don't attempt to read that
+# file at the time the config is created, because in the recipes, that file is created
+# after the config files.
+#
+# See other configuration values below.
+#
+# Parameters of the class, and their defaults:
+#   input='[-1]'             [Descriptor giving the input of the layer.]
+#   dim=-1                   [Output dimension of layer; defaults to the same as the input dim.]
+#   affine-transform-file='' [Must be specified.]
+#
+# Configuration values that we might one day want to add here, but which we
+# don't yet have, include target-rms (affects 'renorm' component).
+class XconfigFixedAffineLayer(XconfigLayerBase):
+    def __init__(self, first_token, key_to_value, prev_names = None):
+        assert first_token == 'fixed-affine-layer'
+        XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names)
+
+    def SetDefaultConfigs(self):
+        # note: self.config['input'] is a descriptor, '[-1]' means output
+        # the most recent layer.
+        self.config = { 'input':'[-1]', 'dim':-1, 'affine-transform-file':'' }
+
+    def CheckConfigs(self):
+        if self.config['affine-transform-file'] == '':
+            raise RuntimeError("In fixed-affine-layer, affine-transform-file must be set.")
+
+    def OutputName(self, qualifier = None):
+        assert qualifier == None
+        return self.name
+
+    def OutputDim(self, qualifier = None):
+        output_dim = self.config['dim']
+        # If not set, the output-dim defaults to the input-dim.
+        if output_dim <= 0:
+            output_dim = self.descriptors['input'][1]
+        return output_dim
+
+    def GetFullConfig(self):
+        ans = []
+
+        # note: each value of self.descriptors is (descriptor, dim,
+        # normalized-string, output-string).
+        # by 'descriptor_final_string' we mean a string that can appear in
+        # config-files, i.e. it contains the 'final' names of nodes.
+        descriptor_final_string = self.descriptors['input'][3]
+        input_dim = self.descriptors['input'][1]
+        output_dim = self.config['dim']
+        transform_file = self.config['affine-transform-file']
+        if output_dim <= 0:
+            output_dim = input_dim
+
+
+        # to init.config we write an output-node with the name 'output' and
+        # with a Descriptor equal to the descriptor that's the input to this
+        # layer.  This will be used to accumulate stats to learn the LDA transform.
+        line = 'output-node name=output input={0}'.format(descriptor_final_string)
+        ans.append(('init', line))
+
+        # write the 'real' component to all.config
+        line = 'component name={0} type=FixedAffineComponent matrix={1}'.format(
+            self.name, transform_file)
+        ans.append(('all', line))
+        # write a random version of the component, with the same dims, to ref.config
+        line = 'component name={0} type=FixedAffineComponent input-dim={1} output-dim={2}'.format(
+            self.name, input_dim, output_dim)
+        ans.append(('ref', line))
+        # the component-node gets written to all.config and ref.config.
+        line = 'component-node name={0} component={0} input={1}'.format(
+            self.name, descriptor_final_string)
+        ans.append(('all', line))
+        ans.append(('ref', line))
+        return ans
+
+# Converts a line as parsed by ParseConfigLine() into a first
+# token e.g. 'input-layer' and a key->value map, into
+# an objet inherited from XconfigLayerBase.
+# 'prev_names' is a list of previous layer names, it's needed
+# to parse things like '[-1]' (meaning: the previous layer)
+# when they appear in Desriptors.
+def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names):
+    if first_token == 'input':
+        return XconfigInputLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'output':
+        return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'output-layer':
+        return XconfigOutputLayer(first_token, key_to_value, prev_names)
+    elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]:
+        return XconfigSimpleLayer(first_token, key_to_value, prev_names)
+    elif first_token == 'fixed-affine-layer':
+        return XconfigFixedAffineLayer(first_token, key_to_value, prev_names)
+    else:
+        raise RuntimeError("Error parsing xconfig line (no such layer type): " +
+                        first_token + ' ' +
+                        ' '.join(['{0}={1}'.format(x,y) for x,y in key_to_value.items()]))
+
+
+# Uses ParseConfigLine() to turn a config line that has been parsed into
+# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' },
+# and then turns this into an object representing that line of the config file.
+# 'prev_names' is a list of the names of preceding lines of the
+# config file.
+def ConfigLineToObject(config_line, prev_names = None):
+    (first_token, key_to_value) = xconfig_utils.ParseConfigLine(config_line)
+    return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names)
+
+
+
+# This function reads an xconfig file and returns it as a list of layers
+# (usually we use the variable name 'all_layers' elsewhere for this).
+# It will die if the xconfig file is empty or if there was
+# some error parsing it.
+def ReadXconfigFile(xconfig_filename):
+    try:
+        f = open(xconfig_filename, 'r')
+    except Exception as e:
+        sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format(
+            sys.argv[0], xconfig_filename, repr(e)))
+    all_layers = []
+    while True:
+        line = f.readline()
+        if line == '':
+            break
+        x = xconfig_utils.ParseConfigLine(line)
+        if x is None:
+            continue   # line was blank or only comments.
+        (first_token, key_to_value) = x
+        # the next call will raise an easy-to-understand exception if
+        # it fails.
+        this_layer = ParsedLineToXconfigLayer(first_token,
+                                              key_to_value,
+                                              all_layers)
+        all_layers.append(this_layer)
+    if len(all_layers) == 0:
+        raise RuntimeError("{0}: xconfig file '{1}' is empty".format(
+            sys.argv[0], xconfig_filename))
+    f.close()
+    return all_layers
+
+
+def TestLayers():
+    # for some config lines that should be printed the same way as they
+    # are read, check that this is the case.
+    for x in [ 'input name=input dim=30' ]:
+        assert str(ConfigLineToObject(x, [])) == x
diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
new file mode 100644
index 00000000000..5744ec4fc46
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py
@@ -0,0 +1,592 @@
+# Copyright  2016  Johns Hopkins University (Author: Daniel Povey).
+# License: Apache 2.0.
+
+# This library contains various utilities that are involved in processing
+# of xconfig -> config conversion.  It contains "generic" lower-level code
+# while xconfig_layers.py contains the code specific to layer types.
+
+from __future__ import print_function
+import subprocess
+import logging
+import math
+import re
+import sys
+import traceback
+import time
+import argparse
+
+# [utility function used in xconfig_layers.py]
+# Given a list of objects of type XconfigLayerBase ('all_layers'),
+# including at least the layers preceding 'current_layer' (and maybe
+# more layers), return the names of layers preceding 'current_layer'
+# This will be used in parsing expressions like [-1] in descriptors
+# (which is an alias for the previous layer).
+def GetPrevNames(all_layers, current_layer):
+    prev_names = []
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        prev_names.append(layer.Name())
+    prev_names_set = set()
+    for name in prev_names:
+        if name in prev_names_set:
+            raise RuntimeError("{0}: Layer name {1} is used more than once.".format(
+                    sys.argv[0], name))
+        prev_names_set.add(name)
+    return prev_names
+
+# [utility function used in xconfig_layers.py]
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a dimension.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetDimFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise RuntimeError("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputDim(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise RuntimeError("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise RuntimeError("No such layer: '{0}'".format(layer_name))
+
+
+# [utility function used in xconfig_layers.py]
+# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like
+# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple
+# component-node name) that can appear in the generated config file.  'all_layers' is a vector of objects
+# inheriting from XconfigLayerBase.  'current_layer' is provided so that the
+# function can make sure not to look in layers that appear *after* this layer
+# (because that's not allowed).
+def GetStringFromLayerName(all_layers, current_layer, full_layer_name):
+    assert isinstance(full_layer_name, str)
+    split_name = full_layer_name.split('.')
+    if len(split_name) == 0:
+        raise RuntimeError("Bad layer name: " + full_layer_name)
+    layer_name = split_name[0]
+    if len(split_name) == 1:
+        qualifier = None
+    else:
+        # we probably expect len(split_name) == 2 in this case,
+        # but no harm in allowing dots in the qualifier.
+        qualifier = '.'.join(split_name[1:])
+
+    for layer in all_layers:
+        if layer is current_layer:
+            break
+        if layer.Name() == layer_name:
+            if not qualifier in layer.Qualifiers():
+                raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format(
+                    layer_name, qualifier))
+            return layer.OutputName(qualifier)
+    # No such layer was found.
+    if layer_name in [ layer.Name() for layer in all_layers ]:
+        raise RuntimeError("Layer '{0}' was requested before it appeared in "
+                        "the xconfig file (circular dependencies or out-of-order "
+                        "layers".format(layer_name))
+    else:
+        raise RuntimeError("No such layer: '{0}'".format(layer_name))
+
+
+# This function, used in converting string values in config lines to
+# configuration values in self.config in layers, attempts to
+# convert 'string_value' to an instance dest_type (which is of type Type)
+# 'key' is only needed for printing errors.
+def ConvertValueToType(key, dest_type, string_value):
+    if dest_type == type(bool()):
+        if string_value == "True" or string_value == "true":
+            return True
+        elif string_value == "False" or string_value == "false":
+            return False
+        else:
+            raise Exception("Invalid configuration value {0}={1} (expected bool)".format(
+                key, string_value))
+    elif dest_type == type(int()):
+        try:
+            return int(string_value)
+        except:
+            raise Exception("Invalid configuration value {0}={1} (expected int)".format(
+                key, string_value))
+    elif dest_type == type(float()):
+        try:
+            return float(string_value)
+        except:
+            raise Exception("Invalid configuration value {0}={1} (expected int)".format(
+                key, string_value))
+    elif dest_type == type(str()):
+        return string_value
+
+
+
+# This class parses and stores a Descriptor-- expression
+# like Append(Offset(input, -3), input) and so on.
+# For the full range of possible expressions, see the comment at the
+# top of src/nnet3/nnet-descriptor.h.
+# Note: as an extension to the descriptor format used in the C++
+# code, we can have e.g. input@-3 meaning Offset(input, -3);
+# and if bare integer numbers appear where a descriptor was expected,
+# they are interpreted as Offset(prev_layer, -3) where 'prev_layer'
+# is the previous layer in the config file.
+
+# Also, in any place a raw input/layer/output name can appear, we accept things
+# like [-1] meaning the previous input/layer/output's name, or [-2] meaning the
+# last-but-one input/layer/output, and so on.
+class Descriptor:
+    def __init__(self,
+                 descriptor_string = None,
+                 prev_names = None):
+        # self.operator is a string that may be 'Offset', 'Append',
+        # 'Sum', 'Failover', 'IfDefined', 'Offset', 'Switch', 'Round',
+        # 'ReplaceIndex'; it also may be None, representing the base-case
+        # (where it's just a layer name)
+
+        # self.items will be whatever items are
+        # inside the parentheses, e.g. if this is Sum(foo bar),
+        # then items will be [d1, d2], where d1 is a Descriptor for
+        # 'foo' and d1 is a Descriptor for 'bar'.  However, there are
+        # cases where elements of self.items are strings or integers,
+        # for instance in an expression 'ReplaceIndex(ivector, x, 0)',
+        # self.items would be [d, 'x', 0], where d is a Descriptor
+        # for 'ivector'.  In the case where self.operator is None (where
+        # this Descriptor represents just a bare layer name), self.
+        # items contains the name of the input layer as a string.
+        self.operator = None
+        self.items = None
+
+        if descriptor_string != None:
+            try:
+                tokens = TokenizeDescriptor(descriptor_string, prev_names)
+                pos = 0
+                (d, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+                # note: 'pos' should point to the 'end of string' marker
+                # that terminates 'tokens'.
+                if pos != len(tokens) - 1:
+                    raise Exception("Parsing Descriptor, saw junk at end: " +
+                                    ' '.join(tokens[pos:-1]))
+                # copy members from d.
+                self.operator = d.operator
+                self.items = d.items
+            except Exception as e:
+                traceback.print_tb(sys.exc_info()[2])
+                raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format(
+                    descriptor_string, repr(e)))
+
+    # This is like the str() function, but it uses the layer_to_string function
+    # (which is a function from strings to strings) to convert layer names (or
+    # in general sub-layer names of the form 'foo.bar') to the component-node
+    # (or, in general, descriptor) names that appear in the final config file.
+    # This mechanism gives those designing layer types the freedom to name their
+    # nodes as they want.
+    def ConfigString(self, layer_to_string):
+        if self.operator is None:
+            assert len(self.items) == 1 and isinstance(self.items[0], str)
+            return layer_to_string(self.items[0])
+        else:
+            assert isinstance(self.operator, str)
+            return self.operator + '(' + ', '.join(
+                    [ item.ConfigString(layer_to_string) if isinstance(item, Descriptor) else str(item)
+                      for item in self.items]) + ')'
+
+    def str(self):
+        if self.operator is None:
+            assert len(self.items) == 1 and isinstance(self.items[0], str)
+            return self.items[0]
+        else:
+            assert isinstance(self.operator, str)
+            return self.operator + '(' + ', '.join([str(item) for item in self.items]) + ')'
+
+    def __str__(self):
+        return self.str()
+
+    # This function returns the dimension (i.e. the feature dimension) of the
+    # descriptor.  It takes 'layer_to_dim' which is a function from
+    # layer-names (including sub-layer names, like lstm1.memory_cell) to
+    # dimensions, e.g. you might have layer_to_dim('ivector') = 100, or
+    # layer_to_dim('affine1') = 1024.
+    # note: layer_to_dim will raise an exception if a nonexistent layer or
+    # sub-layer is requested.
+    def Dim(self, layer_to_dim):
+        if self.operator is None:
+            # base-case: self.items = [ layer_name ] (or sub-layer name, like
+            # 'lstm.memory_cell').
+            return layer_to_dim(self.items[0])
+        elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]:
+            # these are all operators for which all args are descriptors
+            # and must have the same dim.
+            dim = self.items[0].Dim(layer_to_dim)
+            for desc in self.items[1:]:
+                next_dim = desc.Dim(layer_to_dim)
+                if next_dim != dim:
+                    raise Exception("In descriptor {0}, different fields have different "
+                                    "dimensions: {1} != {2}".format(self.str(), dim, next_dim))
+            return dim
+        elif self.operator in [  'Offset', 'Round', 'ReplaceIndex' ]:
+            # for these operators, only the 1st arg is relevant.
+            return self.items[0].Dim(layer_to_dim)
+        elif self.operator == 'Append':
+            return sum([ x.Dim(layer_to_dim) for x in self.items])
+        else:
+            raise Exception("Unknown operator {0}".format(self.operator))
+
+
+
+# This just checks that seen_item == expected_item, and raises an
+# exception if not.
+def ExpectToken(expected_item, seen_item, what_parsing):
+    if seen_item != expected_item:
+        raise Exception("parsing {0}, expected '{1}' but got '{2}'".format(
+            what_parsing, expected_item, seen_item))
+
+# returns true if 'name' is valid as the name of a line (input, layer or output);
+# this is the same as IsValidName() in the nnet3 code.
+def IsValidLineName(name):
+    return isinstance(name, str) and re.match(r'^[a-zA-Z_][-a-zA-Z_0-9.]*', name) != None
+
+# This function for parsing Descriptors takes an array of tokens as produced
+# by TokenizeDescriptor.  It parses a descriptor
+# starting from position pos >= 0 of the array 'tokens', and
+# returns a new position in the array that reflects any tokens consumed while
+# parsing the descriptor.
+# It returns a pair (d, pos) where d is the newly parsed Descriptor,
+# and 'pos' is the new position after consuming the relevant input.
+# 'prev_names' is so that we can find the most recent layer name for
+# expressions like Append(-3, 0, 3) which is shorthand for the most recent
+# layer spliced at those time offsets.
+def ParseNewDescriptor(tokens, pos, prev_names):
+    size = len(tokens)
+    first_token = tokens[pos]
+    pos += 1
+    d = Descriptor()
+
+    # when reading this function, be careful to note the indent level,
+    # there is an if-statement within an if-statement.
+    if first_token in [ 'Offset', 'Round', 'ReplaceIndex', 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]:
+        ExpectToken('(', tokens[pos], first_token + '()')
+        pos += 1
+        d.operator = first_token
+        # the 1st argument of all these operators is a Descriptor.
+        (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+        d.items = [desc]
+
+        if first_token == 'Offset':
+            ExpectToken(',', tokens[pos], 'Offset()')
+            pos += 1
+            try:
+                t_offset = int(tokens[pos])
+                pos += 1
+                d.items.append(t_offset)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            if tokens[pos] == ')':
+                return (d, pos + 1)
+            elif tokens[pos] != ',':
+                raise Exception("Parsing Offset(), expected ')' or ',', got " + tokens[pos])
+            pos += 1
+            try:
+                x_offset = int(tokens[pos])
+                pos += 1
+                d.items.append(x_offset)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'Offset()')
+            pos += 1
+        elif first_token in [ 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]:
+            while True:
+                if tokens[pos] == ')':
+                    # check num-items is correct for some special cases.
+                    if first_token == 'Failover' and len(d.items) != 2:
+                        raise Exception("Parsing Failover(), expected 2 items but got {0}".format(len(d.items)))
+                    if first_token == 'IfDefined' and len(d.items) != 1:
+                        raise Exception("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items)))
+                    pos += 1
+                    break
+                elif tokens[pos] == ',':
+                    pos += 1  # consume the comma.
+                else:
+                    raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos])
+
+                (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names)
+                d.items.append(desc)
+        elif first_token == 'Round':
+            ExpectToken(',', tokens[pos], 'Round()')
+            pos += 1
+            try:
+                t_modulus = int(tokens[pos])
+                assert t_modulus > 0
+                pos += 1
+                d.items.append(t_modulus)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'Round()')
+            pos += 1
+        elif first_token == 'ReplaceIndex':
+            ExpectToken(',', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+            if tokens[pos] in [ 'x', 't' ]:
+                d.items.append(tokens[pos])
+                pos += 1
+            else:
+                raise Exception("Parsing ReplaceIndex(), expected 'x' or 't', got " +
+                                tokens[pos])
+            ExpectToken(',', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+            try:
+                new_value = int(tokens[pos])
+                pos += 1
+                d.items.append(new_value)
+            except:
+                raise Exception("Parsing Offset(), expected integer, got " + tokens[pos])
+            ExpectToken(')', tokens[pos], 'ReplaceIndex()')
+            pos += 1
+        else:
+            raise Exception("code error")
+    elif first_token in [ 'end of string', '(', ')', ',', '@' ]:
+        raise Exception("Expected descriptor, got " + first_token)
+    elif IsValidLineName(first_token) or first_token == '[':
+        # This section parses a raw input/layer/output name, e.g. "affine2"
+        # (which must start with an alphabetic character or underscore),
+        # optionally followed by an offset like '@-3'.
+
+        d.operator = None
+        d.items = [first_token]
+
+        # If the layer-name o is followed by '@', then
+        # we're parsing something like 'affine1@-3' which
+        # is syntactic sugar for 'Offset(affine1, 3)'.
+        if tokens[pos] == '@':
+            pos += 1
+            try:
+                offset_t = int(tokens[pos])
+                pos += 1
+            except:
+                raise Exception("Parse error parsing {0}@{1}".format(
+                    first_token, tokens[pos]))
+            if offset_t != 0:
+                inner_d = d
+                d = Descriptor()
+                # e.g. foo@3 is equivalent to 'Offset(foo, 3)'.
+                d.operator = 'Offset'
+                d.items = [ inner_d, offset_t ]
+    else:
+        # the last possible case is that 'first_token' is just an integer i,
+        # which can appear in things like Append(-3, 0, 3).
+        # See if the token is an integer.
+        # In this case, it's interpreted as the name of previous layer
+        # (with that time offset applied).
+        try:
+            offset_t = int(first_token)
+        except:
+            raise Exception("Parsing descriptor, expected descriptor but got " +
+                            first_token)
+        assert isinstance(prev_names, list)
+        if len(prev_names) < 1:
+            raise Exception("Parsing descriptor, could not interpret '{0}' because "
+                            "there is no previous layer".format(first_token))
+        d.operator = None
+        # the layer name is the name of the most recent layer.
+        d.items = [prev_names[-1]]
+        if offset_t != 0:
+            inner_d = d
+            d = Descriptor()
+            d.operator = 'Offset'
+            d.items = [ inner_d, offset_t ]
+    return (d, pos)
+
+
+# This function takes a string 'descriptor_string' which might
+# look like 'Append([-1], [-2], input)', and a list of previous layer
+# names like prev_names = ['foo', 'bar', 'baz'], and replaces
+# the integers in brackets with the previous layers.  -1 means
+# the most recent previous layer ('baz' in this case), -2
+# means the last layer but one ('bar' in this case), and so on.
+# It will throw an exception if the number is out of range.
+# If there are no such expressions in the string, it's OK if
+# prev_names == None (this is useful for testing).
+def ReplaceBracketExpressionsInDescriptor(descriptor_string,
+                                         prev_names = None):
+    fields = re.split(r'(\[|\])\s*', descriptor_string)
+    out_fields = []
+    i = 0
+    while i < len(fields):
+        f = fields[i]
+        i += 1
+        if f == ']':
+            raise Exception("Unmatched ']' in descriptor")
+        elif f == '[':
+            if i + 2 >= len(fields):
+                raise Exception("Error tokenizing string '{0}': '[' found too close "
+                                "to the end of the descriptor.".format(descriptor_string))
+            assert isinstance(prev_names, list)
+            try:
+                offset = int(fields[i])
+                assert offset < 0 and -offset <= len(prev_names)
+                i += 2  # consume the int and the ']'.
+            except:
+                raise Exception("Error tokenizing string '{0}': expression [{1}] has an "
+                                "invalid or out of range offset.".format(descriptor_string, fields[i]))
+            this_field = prev_names[offset]
+            out_fields.append(this_field)
+        else:
+            out_fields.append(f)
+    return ''.join(out_fields)
+
+
+
+# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors.
+# Note: for convenience in parsing, we add the token 'end-of-string' to this
+# list.
+# The argument 'prev_names' (for the names of previous layers and input and
+# output nodes) is needed to process expressions like [-1] meaning the most
+# recent layer, or [-2] meaning the last layer but one.
+# The default None for prev_names is only supplied for testing purposes.
+def TokenizeDescriptor(descriptor_string,
+                       prev_names = None):
+    # split on '(', ')', ',', '@', and space.  Note: the parenthesis () in the
+    # regexp causes it to output the stuff inside the () as if it were a field,
+    # which is how the call to re.split() keeps characters like '(' and ')' as
+    # tokens.
+    fields = re.split(r'(\(|\)|@|,|\s)\s*',
+                      ReplaceBracketExpressionsInDescriptor(descriptor_string,
+                                                            prev_names))
+    ans = []
+    for f in fields:
+        # don't include fields that are space, or are empty.
+        if re.match(r'^\s*$', f) is None:
+            ans.append(f)
+
+    ans.append('end of string')
+    return ans
+
+
+# This function parses a line in a config file, something like
+# affine-layer name=affine1 input=Append(-3, 0, 3)
+# and returns a pair,
+# (first_token, fields), as (string, dict) e.g. in this case
+# ('affine-layer', {'name':'affine1', 'input':'Append(-3, 0, 3)"
+# Note: spaces are allowed in the field names but = signs are
+# disallowed, which is why it's possible to parse them.
+# This function also removes comments (anything after '#').
+# As a special case, this function will return None if the line
+# is empty after removing spaces.
+def ParseConfigLine(orig_config_line):
+    # Remove comments.
+    # note: splitting on '#' will always give at least one field...  python
+    # treats splitting on space as a special case that may give zero fields.
+    config_line = orig_config_line.split('#')[0]
+    # Now split on space; later we may splice things back together.
+    fields=config_line.split()
+    if len(fields) == 0:
+        return None   # Line was only whitespace after removing comments.
+    first_token = fields[0]
+    # if first_token does not look like 'foo-bar' or 'foo-bar2', then die.
+    if re.match('^[a-z][-a-z0-9]+$', first_token) is None:
+        raise Exception("Error parsing config line (first field doesn't look right): {0}".format(
+            orig_config_line))
+    # get rid of the first field which we put in 'first_token'.
+    fields = fields[1:]
+
+    rest_of_line = ' '.join(fields)
+
+    # suppose rest_of_line is: 'input=Append(foo, bar) foo=bar'
+    # then after the below we'll get
+    # fields = ['', 'input', 'Append(foo, bar)', 'foo', 'bar']
+    fields = re.split(r'\s*([-a-zA-Z0-9_]*)=', rest_of_line)
+    if not (fields[0] == '' and len(fields) % 2 ==  1):
+        raise Exception("Could not parse config line: " + orig_config_line)
+    fields = fields[1:]
+    num_variables = len(fields) / 2
+    ans_dict = dict()
+    for i in range(num_variables):
+        var_name = fields[i * 2]
+        var_value = fields[i * 2 + 1]
+        if re.match(r'[a-zA-Z_]', var_name) is None:
+            raise Exception("Expected variable name '{0}' to start with alphabetic character or _, "
+                            "in config line {1}".format(var_name, orig_config_line))
+        if var_name in ans_dict:
+            raise Exception("Config line has multiply defined variable {0}: {1}".format(
+                var_name, orig_config_line))
+        ans_dict[var_name] = var_value
+    return (first_token, ans_dict)
+
+
+# Reads a config file and returns a list of objects, where each object
+# represents one line of the file.
+def ReadConfigFile(filename):
+    try:
+        f = open(filename, "r")
+    except Exception as e:
+        raise Exception("Error reading config file {0}: {1}".format(
+            filename, repr(e)))
+    ans = []
+    prev_names = []
+    while True:
+        line = f.readline()
+        if line == '':
+            break
+        x = ParseConfigLine(line)
+        if x is None:
+            continue  # blank line
+        (first_token, key_to_value) = x
+        layer_object = ConfigLineToObject(first_token, key_to_value, prev_names)
+        ans.append(layer_object)
+        prev_names.append(layer_object.Name())
+
+
+def TestLibrary():
+    TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1]  # remove 'end of string'
+    assert TokenizeTest("hi") == ['hi']
+    assert TokenizeTest("hi there") == ['hi', 'there']
+    assert TokenizeTest("hi,there") == ['hi', ',', 'there']
+    assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there']
+    assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')']
+    assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ]
+    assert TokenizeDescriptor("[-2].special@2", ['foo', 'bar'])[:-1] == ['foo.special', '@', '2' ]
+
+    assert Descriptor('foo').str() == 'foo'
+    assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)'
+    assert Descriptor('Sum(Offset(foo,1),Offset(foo,0))').str() == 'Sum(Offset(foo, 1), Offset(foo, 0))'
+    for x in [ 'Append(foo, Sum(bar, Offset(baz, 1)))', 'Failover(foo, Offset(bar, -1))',
+               'IfDefined(Round(baz, 3))', 'Switch(foo1, Offset(foo2, 2), Offset(foo3, 3))',
+               'IfDefined(ReplaceIndex(ivector, t, 0))', 'ReplaceIndex(foo, x, 0)' ]:
+        if not Descriptor(x).str() == x:
+            print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), x))
+
+    prev_names = ['last_but_one_layer', 'prev_layer']
+    for x, y in [ ('Sum(foo,bar)', 'Sum(foo, bar)'),
+                  ('Sum(foo1,bar-3_4)', 'Sum(foo1, bar-3_4)'),
+                  ('Append(input@-3, input@0, input@3)',
+                   'Append(Offset(input, -3), input, Offset(input, 3))'),
+                  ('Append(-3,0,3)',
+                   'Append(Offset(prev_layer, -3), prev_layer, Offset(prev_layer, 3))'),
+                  ('[-1]', 'prev_layer'),
+                  ('[-2]', 'last_but_one_layer'),
+                  ('[-2]@3',
+                   'Offset(last_but_one_layer, 3)') ]:
+        if not Descriptor(x, prev_names).str() == y:
+            print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y))
+
+
+    print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar'))
+
+    print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar'))
+    print(ParseConfigLine('affine-layer'))
diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
new file mode 100755
index 00000000000..bd841aae1f2
--- /dev/null
+++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+
+# we're using python 3.x style print but want it to work in python 2.x,
+from __future__ import print_function
+import os
+import argparse
+import shlex
+import sys
+import warnings
+import copy
+import imp
+import ast
+from collections import defaultdict
+
+sys.path.insert(0, 'steps/nnet3/libs/')
+# the following is in case we weren't running this from the normal directory.
+sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/')
+
+import xconfig_utils
+import xconfig_layers
+
+
+def GetArgs():
+    # we add compulsary arguments as named arguments for readability
+    parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files '
+                                     'for neural net creation and training',
+                                     epilog='Search egs/*/*/local/nnet3/*sh for examples')
+    parser.add_argument('xconfig_file',
+                        help='Filename of input xconfig file')
+    parser.add_argument('config_dir',
+                        help='Directory to write config files and variables')
+
+    print(' '.join(sys.argv))
+
+    args = parser.parse_args()
+    args = CheckArgs(args)
+
+    return args
+
+def CheckArgs(args):
+    if not os.path.exists(args.config_dir):
+        os.makedirs(args.config_dir)
+    return args
+
+
+#     # write the files used by other scripts like steps/nnet3/get_egs.sh
+#     f = open(config_dir + 'vars', 'w')
+#     print('model_left_context=' + str(left_context), file=f)
+#     print('model_right_context=' + str(right_context), file=f)
+#     print('num_hidden_layers=' + str(num_hidden_layers), file=f)
+#     print('num_targets=' + str(num_targets), file=f)
+#     print('add_lda=' + ('true' if add_lda else 'false'), file=f)
+#     print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f)
+#     print('objective_type=' + objective_type, file=f)
+#     f.close()
+
+
+
+def BackUpXconfigFile(xconfig_file, config_dir):
+    # we write a copy of the xconfig file just to have a record of the original
+    # input.
+    try:
+        xconfig_file_out = open(config_dir + '/xconfig', 'w')
+    except:
+        sys.exit('{0}: error opening file {1}/xconfig for output'.format(
+            sys.argv[0], config_dir))
+    try:
+        xconfig_file_in = open(xconfig_file)
+    except:
+        sys.exit('{0}: error opening file {1} for input'.format(sys.argv[0], config_dir))
+
+    print("# This file was created by the command:\n"
+          "# {0}\n"
+          "# It is a copy of the source from which the config files in "
+          "# this directory were generated.\n".format(' '.join(sys.argv)),
+          file=xconfig_file_out)
+
+    while True:
+        line = xconfig_file_in.readline()
+        if line == '':
+            break
+        print(line.strip(), file=xconfig_file_out)
+    xconfig_file_out.close()
+    xconfig_file_in.close()
+
+
+# This functions writes config_dir/xconfig.expanded.1 and
+# config_dir/xconfig.expanded.2, showing some of the internal stages of
+# processing the xconfig file before turning it into config files.
+def WriteExpandedXconfigFiles(config_dir, all_layers):
+    try:
+        xconfig_file_out = open(config_dir + '/xconfig.expanded.1', 'w')
+    except:
+        sys.exit('{0}: error opening file {1}/xconfig.expanded.1 for output'.format(
+            sys.argv[0], config_dir))
+
+
+    print('# This file was created by the command:\n'
+          '# ' + ' '.join(sys.argv) + '\n'
+          '#It contains the same content as ./xconfig but it was parsed and\n'
+          '#default config values were set.\n'
+          '# See also ./xconfig.expanded.2\n', file=xconfig_file_out)
+
+    for layer in all_layers:
+        print(str(layer), file=xconfig_file_out)
+    xconfig_file_out.close()
+
+    try:
+        xconfig_file_out = open(config_dir + '/xconfig.expanded.2', 'w')
+    except:
+        sys.exit('{0}: error opening file {1}/xconfig.expanded.2 for output'.format(
+                sys.argv[0], config_dir))
+
+    print('# This file was created by the command:\n'
+          '# ' + ' '.join(sys.argv) + '\n'
+          '# It contains the same content as ./xconfig but it was parsed,\n'
+          '# default config values were set, and Descriptors (input=xxx) were normalized.\n'
+          '# See also ./xconfig.expanded.1\n\n',
+          file=xconfig_file_out)
+
+    for layer in all_layers:
+        layer.NormalizeDescriptors()
+        print(str(layer), file=xconfig_file_out)
+    xconfig_file_out.close()
+
+
+
+
+# This function returns a map from config-file basename
+# e.g. 'init', 'ref', 'layer1' to a documentation string that goes
+# at the top of the file.
+def GetConfigHeaders():
+    ans = defaultdict(str)  # resulting dict will default to the empty string
+                            # for any config files not explicitly listed here.
+    ans['init'] = ('# This file was created by the command:\n'
+                   '# ' + ' '.join(sys.argv) + '\n'
+                   '# It contains the input of the network and is used in\n'
+                   '# accumulating stats for an LDA-like transform of the\n'
+                   '# input features.\n');
+    ans['ref'] = ('# This file was created by the command:\n'
+                  '# ' + ' '.join(sys.argv) + '\n'
+                  '# It contains the entire neural network, but with those\n'
+                  '# components that would normally require fixed vectors/matrices\n'
+                  '# read from disk, replaced with random initialization\n'
+                  '# (this applies to the LDA-like transform and the\n'
+                  '# presoftmax-prior-scale, if applicable).  This file\n'
+                  '# is used only to work out the left-context and right-context\n'
+                  '# of the network.\n');
+    ans['all'] = ('# This file was created by the command:\n'
+                  '# ' + ' '.join(sys.argv) + '\n'
+                  '# It contains the entire neural network.  It might not be used\n'
+                  '# in the current scripts; it\'s provided for forward compatibility\n'
+                  '# to possible future changes.\n')
+
+    # Note: currently we just copy all lines that were going to go to 'all', into
+    # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py
+    ans['layer1'] = ('# This file was created by the command:\n'
+                     '# ' + ' '.join(sys.argv) + '\n'
+                     '# It contains the configuration of the entire neural network.\n'
+                     '# The contents are the same\n'
+                     '# as \'all.config\'.  The reason this file is named this way (and\n'
+                     '# that the config file `num_hidden_layers` contains 1, even though\n'
+                     '# this file may really contain more than 1 hidden layer), is\n'
+                     '# historical... we used to create networks by adding hidden layers\n'
+                     '# one by one (discriminative pretraining), but more recently we\n'
+                     '# have found that it\'s better to add them all at once.  This file\n'
+                     '# exists to enable the older training scripts to work.  Note:\n'
+                     '# it contains the inputs of the neural network even though it doesn\'t\n'
+                     '# have to (since they are included in \'init.config\').  This will\n'
+                     '# give us the flexibility to change the scripts in future.\n');
+    return ans;
+
+
+
+
+# This is where most of the work of this program happens.
+def WriteConfigFiles(config_dir, all_layers):
+    # config_basename_to_lines is map from the basename of the
+    # config, as a string (i.e. 'ref', 'all', 'init') to a list of
+    # strings representing lines to put in the config file.
+    config_basename_to_lines = defaultdict(list)
+
+    config_basename_to_header = GetConfigHeaders()
+
+    for layer in all_layers:
+        try:
+            pairs = layer.GetFullConfig()
+            for config_basename, line in pairs:
+                config_basename_to_lines[config_basename].append(line)
+        except Exception as e:
+            print('{0}: error producing config lines from xconfig '
+                     'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer),
+                                                         repr(e)), file=sys.stderr)
+            raise(e)
+
+    # currently we don't expect any of the GetFullConfig functions to output to
+    # config-basename 'layer1'... currently we just copy this from
+    # config-basename 'all', for back-compatibility to older scripts.
+    assert not 'layer1' in config_basename_to_lines
+    config_basename_to_lines['layer1'] = config_basename_to_lines['all']
+
+    for basename,lines in config_basename_to_lines.items():
+        header = config_basename_to_header[basename]
+        filename = '{0}/{1}.config'.format(config_dir, basename)
+        try:
+            f = open(filename, 'w')
+            print(header, file=f)
+            for line in lines:
+                print(line, file=f)
+            f.close()
+        except Exception as e:
+            print('{0}: error writing to config file {1}: error is {2}'.format(
+                    sys.argv[0], filename, repr(e)), file=sys.stderr)
+            raise e
+
+
+
+
+
+def Main():
+    args = GetArgs()
+    BackUpXconfigFile(args.xconfig_file, args.config_dir)
+    all_layers = xconfig_layers.ReadXconfigFile(args.xconfig_file)
+    WriteExpandedXconfigFiles(args.config_dir, all_layers)
+    WriteConfigFiles(args.config_dir, all_layers)
+
+
+
+if __name__ == '__main__':
+    Main()
+
+
+# test:
+# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+#  mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+# mkdir -p foo; (echo 'input dim=100 name=ivector'; echo 'input dim=40 name=input'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)')  >xconfig; ./xconfig_to_configs.py xconfig foo
+
+