diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py new file mode 100644 index 00000000000..2990e290152 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -0,0 +1,679 @@ +from __future__ import print_function +import subprocess +import logging +import math +import re +import sys +import traceback +import time +import argparse +import xconfig_utils + + +# A base-class for classes representing layers of xconfig files. +# This mainly just sets self.layer_type, self.name and self.config, +class XconfigLayerBase(object): + # Constructor. + # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f + # key_to_value is a dict like: + # { 'name':'affine1', 'input':'Append(0, 1, 2, ReplaceIndex(ivector, t, 0))', 'dim=1024' }. + # The only required and 'special' values that are dealt with directly at this level, are + # 'name' and 'input'. + # The rest are put in self.config and are dealt with by the child classes' init functions. + # all_layers is an array of objects inheriting XconfigLayerBase for all previously + # parsed layers. + + def __init__(self, first_token, key_to_value, all_layers): + self.layer_type = first_token + if not 'name' in key_to_value: + raise RuntimeError("Expected 'name' to be specified.") + self.name = key_to_value['name'] + if not xconfig_utils.IsValidLineName(self.name): + raise RuntimeError("Invalid value: name={0}".format(key_to_value['name'])) + + # the following, which should be overridden in the child class, sets + # default config parameters in self.config. + self.SetDefaultConfigs() + # The following is not to be reimplemented in child classes; + # it sets the config values to those specified by the user, and + # parses any Descriptors. + self.SetConfigs(key_to_value, all_layers) + # the following, which should be overridden in the child class, checks + # that the config parameters that have been set are reasonable. + self.CheckConfigs() + + + # We broke this code out of __init__ for clarity. + def SetConfigs(self, key_to_value, all_layers): + # the child-class constructor will deal with the configuration values + # in a more specific way. + for key,value in key_to_value.items(): + if key != 'name': + if not key in self.config: + raise RuntimeError("Configuration value {0}={1} was not expected in " + "layer of type {2}".format(key, value, self.layer_type)) + self.config[key] = xconfig_utils.ConvertValueToType(key, type(self.config[key]), value) + + + self.descriptors = dict() + self.descriptor_dims = dict() + # Parse Descriptors and get their dims and their 'final' string form. + # Put them as 4-tuples (descriptor, string, normalized-string, final-string) + # in self.descriptors[key] + for key in self.GetDescriptorConfigs(): + if not key in self.config: + raise RuntimeError("{0}: object of type {1} needs to override " + "GetDescriptorConfigs()".format(sys.argv[0], + str(type(self)))) + descriptor_string = self.config[key] # input string. + assert isinstance(descriptor_string, str) + desc = self.ConvertToDescriptor(descriptor_string, all_layers) + desc_dim = self.GetDimForDescriptor(desc, all_layers) + desc_norm_str = desc.str() + # desc_output_str contains the "final" component names, those that + # appear in the actual config file (i.e. not names like + # 'layer.qualifier'); that's how it differs from desc_norm_str. + # Note: it's possible that the two strings might be the same in + # many, even most, cases-- it depends whether OutputName(self, qualifier) + # returns self.Name() + '.' + qualifier when qualifier is not None. + # That's up to the designer of the layer type. + desc_output_str = self.GetStringForDescriptor(desc, all_layers) + self.descriptors[key] = (desc, desc_dim, desc_norm_str, desc_output_str) + # the following helps to check the code by parsing it again. + desc2 = self.ConvertToDescriptor(desc_norm_str, all_layers) + desc_norm_str2 = desc2.str() + # if the following ever fails we'll have to do some debugging. + if desc_norm_str != desc_norm_str2: + raise RuntimeError("Likely code error: '{0}' != '{1}'".format( + desc_norm_str, desc_norm_str2)) + + # This function converts 'this' to a string which could be printed to an + # xconfig file; in xconfig_to_configs.py we actually expand all the lines to + # strings and write it as xconfig.expanded as a reference (so users can + # see any defaults). + def str(self): + ans = '{0} name={1}'.format(self.layer_type, self.name) + ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key]) + for key in sorted(self.config.keys())]) + return ans + + def __str__(self): + return self.str() + + + # This function converts any config variables in self.config which + # correspond to Descriptors, into a 'normalized form' derived from parsing + # them as Descriptors, replacing things like [-1] with the actual layer + # names, and regenerating them as strings. We stored this when the + # object was initialized, in self.descriptors; this function just copies them + # back to the config. + def NormalizeDescriptors(self): + for key,tuple in self.descriptors.items(): + self.config[key] = tuple[2] # desc_norm_str + + # This function, which is a convenience function intended to be called from + # child classes, converts a string representing a descriptor + # ('descriptor_string') into an object of type Descriptor, and returns it. + # It needs 'self' and 'all_layers' (where 'all_layers' is a list of objects + # of type XconfigLayerBase) so that it can work out a list of the names of + # other layers, and get dimensions from them. + def ConvertToDescriptor(self, descriptor_string, all_layers): + prev_names = xconfig_utils.GetPrevNames(all_layers, self) + tokens = xconfig_utils.TokenizeDescriptor(descriptor_string, prev_names) + pos = 0 + (descriptor, pos) = xconfig_utils.ParseNewDescriptor(tokens, pos, prev_names) + # note: 'pos' should point to the 'end of string' marker + # that terminates 'tokens'. + if pos != len(tokens) - 1: + raise RuntimeError("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) + return descriptor + + # Returns the dimension of a Descriptor object. + # This is a convenience function used in SetConfigs. + def GetDimForDescriptor(self, descriptor, all_layers): + layer_to_dim_func = lambda name: xconfig_utils.GetDimFromLayerName(all_layers, self, name) + return descriptor.Dim(layer_to_dim_func) + + # Returns the 'final' string form of a Descriptor object, as could be used + # in config files. + # This is a convenience function provided for use in child classes; + def GetStringForDescriptor(self, descriptor, all_layers): + layer_to_string_func = lambda name: xconfig_utils.GetStringFromLayerName(all_layers, self, name) + return descriptor.ConfigString(layer_to_string_func) + + # Name() returns the name of this layer, e.g. 'affine1'. It does not + # necessarily correspond to a component name. + def Name(self): + return self.name + + ###### Functions that might be overridden by the child class: ##### + + # child classes should override this. + def SetDefaultConfigs(self): + raise RuntimeError("Child classes must override SetDefaultConfigs().") + + # child classes should override this. + def CheckConfigs(self): + pass + + # This function, which may be (but usually will not have to be) overrideden + # by child classes, returns a list of keys/names of config variables that + # will be interpreted as Descriptors. It is used in the function + # 'NormalizeDescriptors()'. This implementation will work + # layer types whose only Descriptor-valued config is 'input'. + + # If a child class adds more config variables that are interpreted as + # descriptors (e.g. to read auxiliary inputs), or does not have an input + # (e.g. the XconfigInputLayer), it should override this function's + # implementation to something like: `return ['input', 'input2']` + def GetDescriptorConfigs(self): + return [ 'input' ] + + # Returns a list of all qualifiers (meaning auxiliary outputs) that this + # layer supports. These are either 'None' for the regular output, or a + # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that + # the layer might provide. Most layer types will not need to override this. + def Qualifiers(self): + return [ None ] + + # Called with qualifier == None, this returns the component-node name of the + # principal output of the layer (or if you prefer, the text form of a + # descriptor that gives you such an output; such as Append(some_node, + # some_other_node)). + # The 'qualifier' argument is a text value that is designed for extensions + # to layers that have additional auxiliary outputs. For example, to implement + # a highway LSTM you need the memory-cell of a layer, so you might allow + # qualifier='memory_cell' for such a layer type, and it would return the + # component node or a suitable Descriptor: something like 'lstm3.c_t' + def OutputName(self, qualifier = None): + raise RuntimeError("Child classes must override OutputName()") + + # The dimension that this layer outputs. The 'qualifier' parameter is for + # layer types which support auxiliary outputs. + def OutputDim(self, qualifier = None): + raise RuntimeError("Child classes must override OutputDim()") + + # This function returns lines destined for the 'full' config format, as + # would be read by the C++ programs. + # Since the program xconfig_to_configs.py writes several config files, this + # function returns a list of pairs of the form (config_file_basename, line), + # e.g. something like + # [ ('init', 'input-node name=input dim=40'), + # ('ref', 'input-node name=input dim=40') ] + # which would be written to config_dir/init.config and config_dir/ref.config. + def GetFullConfig(self): + raise RuntimeError("Child classes must override GetFullConfig()") + + +# This class is for lines like +# 'input name=input dim=40' +# or +# 'input name=ivector dim=100' +# in the config file. +class XconfigInputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'input' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + + def SetDefaultConfigs(self): + self.config = { 'dim':-1 } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise RuntimeError("Dimension of input-layer '{0}' is not set".format(self.name)) + + def GetDescriptorConfigs(self): + return [] # there is no 'input' field in self.config. + + def OutputName(self, qualifier = None): + assert qualifier is None + return self.name + + def OutputDim(self, qualifier = None): + assert qualifier is None + return self.config['dim'] + + def GetFullConfig(self): + # the input layers need to be printed in 'init.config' (which + # initializes the neural network prior to the LDA), in 'ref.config', + # which is a version of the config file used for getting left and right + # context (it doesn't read anything for the LDA-like transform and/or + # presoftmax-prior-scale components) + # In 'full.config' we write everything, this is just for reference, + # and also for cases where we don't use the LDA-like transform. + ans = [] + for config_name in [ 'init', 'ref', 'all' ]: + ans.append( (config_name, + 'input-node name={0} dim={1}'.format(self.name, + self.config['dim']))) + return ans + + + +# This class is for lines like +# 'output name=output input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))' +# This is for outputs that are not really output "layers" (there is no affine transform or +# nonlinearity), they just directly map to an output-node in nnet3. +class XconfigTrivialOutputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'output' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]' } + + def CheckConfigs(self): + pass # nothing to check; descriptor-parsing can't happen in this function. + + def OutputName(self, qualifier = None): + assert qualifier is None + return self.name + + def OutputDim(self, qualifier = None): + assert qualifier is None + # note: each value of self.descriptors is (descriptor, dim, normalized-string, output-string). + return self.descriptors['input'][1] + + def GetFullConfig(self): + # the input layers need to be printed in 'init.config' (which + # initializes the neural network prior to the LDA), in 'ref.config', + # which is a version of the config file used for getting left and right + # context (it doesn't read anything for the LDA-like transform and/or + # presoftmax-prior-scale components) + # In 'full.config' we write everything, this is just for reference, + # and also for cases where we don't use the LDA-like transform. + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'output-string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_str = self.descriptors['input'][3] + + for config_name in [ 'ref', 'all' ]: + ans.append( (config_name, + 'output-node name={0} input={1}'.format( + self.name, descriptor_final_str))) + return ans + + +# This class is for lines like +# 'output-layer name=output dim=4257 input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))' +# By default this includes a log-softmax component. The parameters are initialized to zero, as +# this is best for output layers. +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer, will normally equal the number of pdfs.] +# include-log-softmax=true [setting it to false will omit the log-softmax component- useful for chain +# models.] +# objective-type=linear [the only other choice currently is 'quadratic', for use in regression +# problems] + +# learning-rate-factor=1.0 [Learning rate factor for the final affine component, multiplies the +# standard learning rate. normally you'll leave this as-is, but for +# xent regularization output layers for chain models you'll want to set +# learning-rate-factor=(0.5/xent_regularize), normally +# learning-rate-factor=5.0 since xent_regularize is normally 0.1. +# presoftmax-scale-file='' [If set, a filename for a vector that will be used to scale the output +# of the affine component before the log-softmax (if +# include-log-softmax=true), or before the output (if not). This is +# helpful to avoid instability in training due to some classes having +# much more data than others. The way we normally create this vector +# is to take the priors of the classes to the power -0.25 and rescale +# them so the average is 1.0. This factor -0.25 is referred to +# as presoftmax_prior_scale_power in scripts.] +# In the scripts this would normally be set to config_dir/presoftmax_prior_scale.vec +class XconfigOutputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'output-layer' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'include-log-softmax':True, + 'objective-type':'linear', 'learning-rate-factor':1.0, + 'include-log-softmax':True, 'presoftmax-scale-file':'' } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise RuntimeError("In output-layer, dim has invalid value {0}".format(self.config['dim'])) + if self.config['objective-type'] != 'linear' and self.config['objective_type'] != 'quadratic': + raise RuntimeError("In output-layer, objective-type has invalid value {0}".format( + self.config['objective-type'])) + if self.config['learning-rate-factor'] <= 0.0: + raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format( + self.config['learning-rate-factor'])) + + + # you cannot access the output of this layer from other layers... see + # comment in OutputName for the reason why. + def Qualifiers(self): + return [] + + def OutputName(self, qualifier = None): + # Note: nodes of type output-node in nnet3 may not be accessed in Descriptors, + # so calling this with qualifier=None doesn't make sense. But it might make + # sense to make the output of the softmax layer and/or the output of the + # affine layer available as inputs to other layers, in some circumstances. + # we'll implement that when it's needed. + raise RuntimeError("Outputs of output-layer may not be used by other layers") + + def OutputDim(self, qualifier = None): + # see comment in OutputName(). + raise RuntimeError("Outputs of output-layer may not be used by other layers") + + def GetFullConfig(self): + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + objective_type = self.config['objective-type'] + learning_rate_factor = self.config['learning-rate-factor'] + include_log_softmax = self.config['include-log-softmax'] + presoftmax_scale_file = self.config['presoftmax-scale-file'] + + + # note: ref.config is used only for getting the left-context and right-context + # of the network; all.config is where we put the actual network definition. + for config_name in [ 'ref', 'all' ]: + # First the affine node. + line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} ' + 'output-dim={2} param-stddev=0 bias-stddev=0 '.format( + self.name, input_dim, output_dim) + + ('learning-rate-factor={0} '.format(learning_rate_factor) + if learning_rate_factor != 1.0 else '')) + ans.append((config_name, line)) + line = ('component-node name={0}.affine component={0}.affine input={1}'.format( + self.name, descriptor_final_string)) + ans.append((config_name, line)) + cur_node = '{0}.affine'.format(self.name) + if presoftmax_scale_file != '' and config_name == 'all': + # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the + # time we evaluate it. (ref.config is used to find the left/right context). + line = ('component name={0}.fixed-scale type=FixedScaleComponent scales={1}'.format( + self.name, presoftmax_scale_file)) + ans.append((config_name, line)) + line = ('component-node name={0}.fixed-scale component={0}.fixed-scale input={1}'.format( + self.name, cur_node)) + ans.append((config_name, line)) + cur_node = '{0}.fixed-scale'.format(self.name) + if include_log_softmax: + line = ('component name={0}.log-softmax type=LogSoftmaxComponent dim={1}'.format( + self.name, output_dim)) + ans.append((config_name, line)) + line = ('component-node name={0}.log-softmax component={0}.log-softmax input={1}'.format( + self.name, cur_node)) + ans.append((config_name, line)) + cur_node = '{0}.log-softmax'.format(self.name) + line = ('output-node name={0} input={0}.log-softmax'.format(self.name, cur_node)) + ans.append((config_name, line)) + return ans + + +# This class is for lines like +# 'relu-renorm-layer name=layer1 dim=1024 input=Append(-3,0,3)' +# or: +# 'sigmoid-layer name=layer1 dim=1024 input=Append(-3,0,3)' +# Here, the name of the layer itself dictates the sequence of nonlinearities +# that are applied; the name should contain some combination of 'relu', 'renorm', +# 'sigmoid' and 'tanh', and these nonlinearities will be added after the +# affine component. +# +# The dimension specified is the output dim; the input dim is worked out from the input descriptor. +# This class supports only nonlinearity types that do not change the dimension; we can create +# another layer type to enable the use p-norm and similar dimension-reducing nonlinearities. +# +# See other configuration values below. +# +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer, e.g. 1024] +# self-repair-scale=1.0e-05 [Affects relu, sigmoid and tanh layers.] +# +# Configuration values that we might one day want to add here, but which we +# don't yet have, include target-rms (affects 'renorm' component). +class XconfigSimpleLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + # Here we just list some likely combinations.. you can just add any + # combinations you want to use, to this list. + assert first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', + 'tanh-layer' ] + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'self-repair-scale':1.0e-05 } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise RuntimeError("In {0}, dim has invalid value {1}".format(self.layer_type, + self.config['dim'])) + if self.config['self-repair-scale'] < 0.0 or self.config['self-repair-scale'] > 1.0: + raise RuntimeError("In {0}, objective-type has invalid value {0}".format( + self.layer_type, self.config['self-repair-scale'])) + + def OutputName(self, qualifier = None): + assert qualifier == None + + split_layer_name = self.layer_type.split('-') + assert split_layer_name[-1] == 'layer' + last_nonlinearity = split_layer_name[-2] + # return something like: layer3.renorm + return '{0}.{1}'.format(self.name, last_nonlinearity) + + def OutputDim(self, qualifier = None): + return self.config['dim'] + + def GetFullConfig(self): + + ans = [] + + split_layer_name = self.layer_type.split('-') + assert split_layer_name[-1] == 'layer' + nonlinearities = split_layer_name[:-1] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + self_repair_scale = self.config['self-repair-scale'] + + for config_name in [ 'ref', 'all' ]: + # First the affine node. + line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} ' + 'output-dim={2} '.format(self.name, input_dim, output_dim)) + ans.append((config_name, line)) + line = ('component-node name={0}.affine component={0}.affine input={1}'.format( + self.name, descriptor_final_string)) + ans.append((config_name, line)) + cur_node = '{0}.affine'.format(self.name) + + for nonlinearity in nonlinearities: + if nonlinearity == 'relu': + line = ('component name={0}.{1} type=RectifiedLinearComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'sigmoid': + line = ('component name={0}.{1} type=SigmoidComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'tanh': + line = ('component name={0}.{1} type=TanhComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'renorm': + line = ('component name={0}.{1} type=NormalizeComponent dim={2} '.format( + self.name, nonlinearity, output_dim)) + else: + raise RuntimeError("Unknown nonlinearity type: {0}".format(nonlinearity)) + ans.append((config_name, line)) + line = 'component-node name={0}.{1} component={0}.{1} input={2}'.format( + self.name, nonlinearity, cur_node) + ans.append((config_name, line)) + cur_node = '{0}.{1}'.format(self.name, nonlinearity) + return ans + + +# This class is for lines like +# 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat' +# +# The output dimension of the layer may be specified via 'dim=xxx', but if not specified, +# the dimension defaults to the same as the input. Note: we don't attempt to read that +# file at the time the config is created, because in the recipes, that file is created +# after the config files. +# +# See other configuration values below. +# +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer; defaults to the same as the input dim.] +# affine-transform-file='' [Must be specified.] +# +# Configuration values that we might one day want to add here, but which we +# don't yet have, include target-rms (affects 'renorm' component). +class XconfigFixedAffineLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'fixed-affine-layer' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'affine-transform-file':'' } + + def CheckConfigs(self): + if self.config['affine-transform-file'] == '': + raise RuntimeError("In fixed-affine-layer, affine-transform-file must be set.") + + def OutputName(self, qualifier = None): + assert qualifier == None + return self.name + + def OutputDim(self, qualifier = None): + output_dim = self.config['dim'] + # If not set, the output-dim defaults to the input-dim. + if output_dim <= 0: + output_dim = self.descriptors['input'][1] + return output_dim + + def GetFullConfig(self): + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + transform_file = self.config['affine-transform-file'] + if output_dim <= 0: + output_dim = input_dim + + + # to init.config we write an output-node with the name 'output' and + # with a Descriptor equal to the descriptor that's the input to this + # layer. This will be used to accumulate stats to learn the LDA transform. + line = 'output-node name=output input={0}'.format(descriptor_final_string) + ans.append(('init', line)) + + # write the 'real' component to all.config + line = 'component name={0} type=FixedAffineComponent matrix={1}'.format( + self.name, transform_file) + ans.append(('all', line)) + # write a random version of the component, with the same dims, to ref.config + line = 'component name={0} type=FixedAffineComponent input-dim={1} output-dim={2}'.format( + self.name, input_dim, output_dim) + ans.append(('ref', line)) + # the component-node gets written to all.config and ref.config. + line = 'component-node name={0} component={0} input={1}'.format( + self.name, descriptor_final_string) + ans.append(('all', line)) + ans.append(('ref', line)) + return ans + +# Converts a line as parsed by ParseConfigLine() into a first +# token e.g. 'input-layer' and a key->value map, into +# an objet inherited from XconfigLayerBase. +# 'prev_names' is a list of previous layer names, it's needed +# to parse things like '[-1]' (meaning: the previous layer) +# when they appear in Desriptors. +def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): + if first_token == 'input': + return XconfigInputLayer(first_token, key_to_value, prev_names) + elif first_token == 'output': + return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names) + elif first_token == 'output-layer': + return XconfigOutputLayer(first_token, key_to_value, prev_names) + elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]: + return XconfigSimpleLayer(first_token, key_to_value, prev_names) + elif first_token == 'fixed-affine-layer': + return XconfigFixedAffineLayer(first_token, key_to_value, prev_names) + else: + raise RuntimeError("Error parsing xconfig line (no such layer type): " + + first_token + ' ' + + ' '.join(['{0}={1}'.format(x,y) for x,y in key_to_value.items()])) + + +# Uses ParseConfigLine() to turn a config line that has been parsed into +# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, +# and then turns this into an object representing that line of the config file. +# 'prev_names' is a list of the names of preceding lines of the +# config file. +def ConfigLineToObject(config_line, prev_names = None): + (first_token, key_to_value) = xconfig_utils.ParseConfigLine(config_line) + return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names) + + + +# This function reads an xconfig file and returns it as a list of layers +# (usually we use the variable name 'all_layers' elsewhere for this). +# It will die if the xconfig file is empty or if there was +# some error parsing it. +def ReadXconfigFile(xconfig_filename): + try: + f = open(xconfig_filename, 'r') + except Exception as e: + sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format( + sys.argv[0], xconfig_filename, repr(e))) + all_layers = [] + while True: + line = f.readline() + if line == '': + break + x = xconfig_utils.ParseConfigLine(line) + if x is None: + continue # line was blank or only comments. + (first_token, key_to_value) = x + # the next call will raise an easy-to-understand exception if + # it fails. + this_layer = ParsedLineToXconfigLayer(first_token, + key_to_value, + all_layers) + all_layers.append(this_layer) + if len(all_layers) == 0: + raise RuntimeError("{0}: xconfig file '{1}' is empty".format( + sys.argv[0], xconfig_filename)) + f.close() + return all_layers + + +def TestLayers(): + # for some config lines that should be printed the same way as they + # are read, check that this is the case. + for x in [ 'input name=input dim=30' ]: + assert str(ConfigLineToObject(x, [])) == x diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py new file mode 100644 index 00000000000..5744ec4fc46 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py @@ -0,0 +1,592 @@ +# Copyright 2016 Johns Hopkins University (Author: Daniel Povey). +# License: Apache 2.0. + +# This library contains various utilities that are involved in processing +# of xconfig -> config conversion. It contains "generic" lower-level code +# while xconfig_layers.py contains the code specific to layer types. + +from __future__ import print_function +import subprocess +import logging +import math +import re +import sys +import traceback +import time +import argparse + +# [utility function used in xconfig_layers.py] +# Given a list of objects of type XconfigLayerBase ('all_layers'), +# including at least the layers preceding 'current_layer' (and maybe +# more layers), return the names of layers preceding 'current_layer' +# This will be used in parsing expressions like [-1] in descriptors +# (which is an alias for the previous layer). +def GetPrevNames(all_layers, current_layer): + prev_names = [] + for layer in all_layers: + if layer is current_layer: + break + prev_names.append(layer.Name()) + prev_names_set = set() + for name in prev_names: + if name in prev_names_set: + raise RuntimeError("{0}: Layer name {1} is used more than once.".format( + sys.argv[0], name)) + prev_names_set.add(name) + return prev_names + +# [utility function used in xconfig_layers.py] +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a dimension. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetDimFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise RuntimeError("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputDim(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise RuntimeError("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise RuntimeError("No such layer: '{0}'".format(layer_name)) + + +# [utility function used in xconfig_layers.py] +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple +# component-node name) that can appear in the generated config file. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetStringFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise RuntimeError("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputName(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise RuntimeError("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise RuntimeError("No such layer: '{0}'".format(layer_name)) + + +# This function, used in converting string values in config lines to +# configuration values in self.config in layers, attempts to +# convert 'string_value' to an instance dest_type (which is of type Type) +# 'key' is only needed for printing errors. +def ConvertValueToType(key, dest_type, string_value): + if dest_type == type(bool()): + if string_value == "True" or string_value == "true": + return True + elif string_value == "False" or string_value == "false": + return False + else: + raise Exception("Invalid configuration value {0}={1} (expected bool)".format( + key, string_value)) + elif dest_type == type(int()): + try: + return int(string_value) + except: + raise Exception("Invalid configuration value {0}={1} (expected int)".format( + key, string_value)) + elif dest_type == type(float()): + try: + return float(string_value) + except: + raise Exception("Invalid configuration value {0}={1} (expected int)".format( + key, string_value)) + elif dest_type == type(str()): + return string_value + + + +# This class parses and stores a Descriptor-- expression +# like Append(Offset(input, -3), input) and so on. +# For the full range of possible expressions, see the comment at the +# top of src/nnet3/nnet-descriptor.h. +# Note: as an extension to the descriptor format used in the C++ +# code, we can have e.g. input@-3 meaning Offset(input, -3); +# and if bare integer numbers appear where a descriptor was expected, +# they are interpreted as Offset(prev_layer, -3) where 'prev_layer' +# is the previous layer in the config file. + +# Also, in any place a raw input/layer/output name can appear, we accept things +# like [-1] meaning the previous input/layer/output's name, or [-2] meaning the +# last-but-one input/layer/output, and so on. +class Descriptor: + def __init__(self, + descriptor_string = None, + prev_names = None): + # self.operator is a string that may be 'Offset', 'Append', + # 'Sum', 'Failover', 'IfDefined', 'Offset', 'Switch', 'Round', + # 'ReplaceIndex'; it also may be None, representing the base-case + # (where it's just a layer name) + + # self.items will be whatever items are + # inside the parentheses, e.g. if this is Sum(foo bar), + # then items will be [d1, d2], where d1 is a Descriptor for + # 'foo' and d1 is a Descriptor for 'bar'. However, there are + # cases where elements of self.items are strings or integers, + # for instance in an expression 'ReplaceIndex(ivector, x, 0)', + # self.items would be [d, 'x', 0], where d is a Descriptor + # for 'ivector'. In the case where self.operator is None (where + # this Descriptor represents just a bare layer name), self. + # items contains the name of the input layer as a string. + self.operator = None + self.items = None + + if descriptor_string != None: + try: + tokens = TokenizeDescriptor(descriptor_string, prev_names) + pos = 0 + (d, pos) = ParseNewDescriptor(tokens, pos, prev_names) + # note: 'pos' should point to the 'end of string' marker + # that terminates 'tokens'. + if pos != len(tokens) - 1: + raise Exception("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) + # copy members from d. + self.operator = d.operator + self.items = d.items + except Exception as e: + traceback.print_tb(sys.exc_info()[2]) + raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format( + descriptor_string, repr(e))) + + # This is like the str() function, but it uses the layer_to_string function + # (which is a function from strings to strings) to convert layer names (or + # in general sub-layer names of the form 'foo.bar') to the component-node + # (or, in general, descriptor) names that appear in the final config file. + # This mechanism gives those designing layer types the freedom to name their + # nodes as they want. + def ConfigString(self, layer_to_string): + if self.operator is None: + assert len(self.items) == 1 and isinstance(self.items[0], str) + return layer_to_string(self.items[0]) + else: + assert isinstance(self.operator, str) + return self.operator + '(' + ', '.join( + [ item.ConfigString(layer_to_string) if isinstance(item, Descriptor) else str(item) + for item in self.items]) + ')' + + def str(self): + if self.operator is None: + assert len(self.items) == 1 and isinstance(self.items[0], str) + return self.items[0] + else: + assert isinstance(self.operator, str) + return self.operator + '(' + ', '.join([str(item) for item in self.items]) + ')' + + def __str__(self): + return self.str() + + # This function returns the dimension (i.e. the feature dimension) of the + # descriptor. It takes 'layer_to_dim' which is a function from + # layer-names (including sub-layer names, like lstm1.memory_cell) to + # dimensions, e.g. you might have layer_to_dim('ivector') = 100, or + # layer_to_dim('affine1') = 1024. + # note: layer_to_dim will raise an exception if a nonexistent layer or + # sub-layer is requested. + def Dim(self, layer_to_dim): + if self.operator is None: + # base-case: self.items = [ layer_name ] (or sub-layer name, like + # 'lstm.memory_cell'). + return layer_to_dim(self.items[0]) + elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]: + # these are all operators for which all args are descriptors + # and must have the same dim. + dim = self.items[0].Dim(layer_to_dim) + for desc in self.items[1:]: + next_dim = desc.Dim(layer_to_dim) + if next_dim != dim: + raise Exception("In descriptor {0}, different fields have different " + "dimensions: {1} != {2}".format(self.str(), dim, next_dim)) + return dim + elif self.operator in [ 'Offset', 'Round', 'ReplaceIndex' ]: + # for these operators, only the 1st arg is relevant. + return self.items[0].Dim(layer_to_dim) + elif self.operator == 'Append': + return sum([ x.Dim(layer_to_dim) for x in self.items]) + else: + raise Exception("Unknown operator {0}".format(self.operator)) + + + +# This just checks that seen_item == expected_item, and raises an +# exception if not. +def ExpectToken(expected_item, seen_item, what_parsing): + if seen_item != expected_item: + raise Exception("parsing {0}, expected '{1}' but got '{2}'".format( + what_parsing, expected_item, seen_item)) + +# returns true if 'name' is valid as the name of a line (input, layer or output); +# this is the same as IsValidName() in the nnet3 code. +def IsValidLineName(name): + return isinstance(name, str) and re.match(r'^[a-zA-Z_][-a-zA-Z_0-9.]*', name) != None + +# This function for parsing Descriptors takes an array of tokens as produced +# by TokenizeDescriptor. It parses a descriptor +# starting from position pos >= 0 of the array 'tokens', and +# returns a new position in the array that reflects any tokens consumed while +# parsing the descriptor. +# It returns a pair (d, pos) where d is the newly parsed Descriptor, +# and 'pos' is the new position after consuming the relevant input. +# 'prev_names' is so that we can find the most recent layer name for +# expressions like Append(-3, 0, 3) which is shorthand for the most recent +# layer spliced at those time offsets. +def ParseNewDescriptor(tokens, pos, prev_names): + size = len(tokens) + first_token = tokens[pos] + pos += 1 + d = Descriptor() + + # when reading this function, be careful to note the indent level, + # there is an if-statement within an if-statement. + if first_token in [ 'Offset', 'Round', 'ReplaceIndex', 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]: + ExpectToken('(', tokens[pos], first_token + '()') + pos += 1 + d.operator = first_token + # the 1st argument of all these operators is a Descriptor. + (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names) + d.items = [desc] + + if first_token == 'Offset': + ExpectToken(',', tokens[pos], 'Offset()') + pos += 1 + try: + t_offset = int(tokens[pos]) + pos += 1 + d.items.append(t_offset) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + if tokens[pos] == ')': + return (d, pos + 1) + elif tokens[pos] != ',': + raise Exception("Parsing Offset(), expected ')' or ',', got " + tokens[pos]) + pos += 1 + try: + x_offset = int(tokens[pos]) + pos += 1 + d.items.append(x_offset) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'Offset()') + pos += 1 + elif first_token in [ 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]: + while True: + if tokens[pos] == ')': + # check num-items is correct for some special cases. + if first_token == 'Failover' and len(d.items) != 2: + raise Exception("Parsing Failover(), expected 2 items but got {0}".format(len(d.items))) + if first_token == 'IfDefined' and len(d.items) != 1: + raise Exception("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items))) + pos += 1 + break + elif tokens[pos] == ',': + pos += 1 # consume the comma. + else: + raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos]) + + (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names) + d.items.append(desc) + elif first_token == 'Round': + ExpectToken(',', tokens[pos], 'Round()') + pos += 1 + try: + t_modulus = int(tokens[pos]) + assert t_modulus > 0 + pos += 1 + d.items.append(t_modulus) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'Round()') + pos += 1 + elif first_token == 'ReplaceIndex': + ExpectToken(',', tokens[pos], 'ReplaceIndex()') + pos += 1 + if tokens[pos] in [ 'x', 't' ]: + d.items.append(tokens[pos]) + pos += 1 + else: + raise Exception("Parsing ReplaceIndex(), expected 'x' or 't', got " + + tokens[pos]) + ExpectToken(',', tokens[pos], 'ReplaceIndex()') + pos += 1 + try: + new_value = int(tokens[pos]) + pos += 1 + d.items.append(new_value) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'ReplaceIndex()') + pos += 1 + else: + raise Exception("code error") + elif first_token in [ 'end of string', '(', ')', ',', '@' ]: + raise Exception("Expected descriptor, got " + first_token) + elif IsValidLineName(first_token) or first_token == '[': + # This section parses a raw input/layer/output name, e.g. "affine2" + # (which must start with an alphabetic character or underscore), + # optionally followed by an offset like '@-3'. + + d.operator = None + d.items = [first_token] + + # If the layer-name o is followed by '@', then + # we're parsing something like 'affine1@-3' which + # is syntactic sugar for 'Offset(affine1, 3)'. + if tokens[pos] == '@': + pos += 1 + try: + offset_t = int(tokens[pos]) + pos += 1 + except: + raise Exception("Parse error parsing {0}@{1}".format( + first_token, tokens[pos])) + if offset_t != 0: + inner_d = d + d = Descriptor() + # e.g. foo@3 is equivalent to 'Offset(foo, 3)'. + d.operator = 'Offset' + d.items = [ inner_d, offset_t ] + else: + # the last possible case is that 'first_token' is just an integer i, + # which can appear in things like Append(-3, 0, 3). + # See if the token is an integer. + # In this case, it's interpreted as the name of previous layer + # (with that time offset applied). + try: + offset_t = int(first_token) + except: + raise Exception("Parsing descriptor, expected descriptor but got " + + first_token) + assert isinstance(prev_names, list) + if len(prev_names) < 1: + raise Exception("Parsing descriptor, could not interpret '{0}' because " + "there is no previous layer".format(first_token)) + d.operator = None + # the layer name is the name of the most recent layer. + d.items = [prev_names[-1]] + if offset_t != 0: + inner_d = d + d = Descriptor() + d.operator = 'Offset' + d.items = [ inner_d, offset_t ] + return (d, pos) + + +# This function takes a string 'descriptor_string' which might +# look like 'Append([-1], [-2], input)', and a list of previous layer +# names like prev_names = ['foo', 'bar', 'baz'], and replaces +# the integers in brackets with the previous layers. -1 means +# the most recent previous layer ('baz' in this case), -2 +# means the last layer but one ('bar' in this case), and so on. +# It will throw an exception if the number is out of range. +# If there are no such expressions in the string, it's OK if +# prev_names == None (this is useful for testing). +def ReplaceBracketExpressionsInDescriptor(descriptor_string, + prev_names = None): + fields = re.split(r'(\[|\])\s*', descriptor_string) + out_fields = [] + i = 0 + while i < len(fields): + f = fields[i] + i += 1 + if f == ']': + raise Exception("Unmatched ']' in descriptor") + elif f == '[': + if i + 2 >= len(fields): + raise Exception("Error tokenizing string '{0}': '[' found too close " + "to the end of the descriptor.".format(descriptor_string)) + assert isinstance(prev_names, list) + try: + offset = int(fields[i]) + assert offset < 0 and -offset <= len(prev_names) + i += 2 # consume the int and the ']'. + except: + raise Exception("Error tokenizing string '{0}': expression [{1}] has an " + "invalid or out of range offset.".format(descriptor_string, fields[i])) + this_field = prev_names[offset] + out_fields.append(this_field) + else: + out_fields.append(f) + return ''.join(out_fields) + + + +# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors. +# Note: for convenience in parsing, we add the token 'end-of-string' to this +# list. +# The argument 'prev_names' (for the names of previous layers and input and +# output nodes) is needed to process expressions like [-1] meaning the most +# recent layer, or [-2] meaning the last layer but one. +# The default None for prev_names is only supplied for testing purposes. +def TokenizeDescriptor(descriptor_string, + prev_names = None): + # split on '(', ')', ',', '@', and space. Note: the parenthesis () in the + # regexp causes it to output the stuff inside the () as if it were a field, + # which is how the call to re.split() keeps characters like '(' and ')' as + # tokens. + fields = re.split(r'(\(|\)|@|,|\s)\s*', + ReplaceBracketExpressionsInDescriptor(descriptor_string, + prev_names)) + ans = [] + for f in fields: + # don't include fields that are space, or are empty. + if re.match(r'^\s*$', f) is None: + ans.append(f) + + ans.append('end of string') + return ans + + +# This function parses a line in a config file, something like +# affine-layer name=affine1 input=Append(-3, 0, 3) +# and returns a pair, +# (first_token, fields), as (string, dict) e.g. in this case +# ('affine-layer', {'name':'affine1', 'input':'Append(-3, 0, 3)" +# Note: spaces are allowed in the field names but = signs are +# disallowed, which is why it's possible to parse them. +# This function also removes comments (anything after '#'). +# As a special case, this function will return None if the line +# is empty after removing spaces. +def ParseConfigLine(orig_config_line): + # Remove comments. + # note: splitting on '#' will always give at least one field... python + # treats splitting on space as a special case that may give zero fields. + config_line = orig_config_line.split('#')[0] + # Now split on space; later we may splice things back together. + fields=config_line.split() + if len(fields) == 0: + return None # Line was only whitespace after removing comments. + first_token = fields[0] + # if first_token does not look like 'foo-bar' or 'foo-bar2', then die. + if re.match('^[a-z][-a-z0-9]+$', first_token) is None: + raise Exception("Error parsing config line (first field doesn't look right): {0}".format( + orig_config_line)) + # get rid of the first field which we put in 'first_token'. + fields = fields[1:] + + rest_of_line = ' '.join(fields) + + # suppose rest_of_line is: 'input=Append(foo, bar) foo=bar' + # then after the below we'll get + # fields = ['', 'input', 'Append(foo, bar)', 'foo', 'bar'] + fields = re.split(r'\s*([-a-zA-Z0-9_]*)=', rest_of_line) + if not (fields[0] == '' and len(fields) % 2 == 1): + raise Exception("Could not parse config line: " + orig_config_line) + fields = fields[1:] + num_variables = len(fields) / 2 + ans_dict = dict() + for i in range(num_variables): + var_name = fields[i * 2] + var_value = fields[i * 2 + 1] + if re.match(r'[a-zA-Z_]', var_name) is None: + raise Exception("Expected variable name '{0}' to start with alphabetic character or _, " + "in config line {1}".format(var_name, orig_config_line)) + if var_name in ans_dict: + raise Exception("Config line has multiply defined variable {0}: {1}".format( + var_name, orig_config_line)) + ans_dict[var_name] = var_value + return (first_token, ans_dict) + + +# Reads a config file and returns a list of objects, where each object +# represents one line of the file. +def ReadConfigFile(filename): + try: + f = open(filename, "r") + except Exception as e: + raise Exception("Error reading config file {0}: {1}".format( + filename, repr(e))) + ans = [] + prev_names = [] + while True: + line = f.readline() + if line == '': + break + x = ParseConfigLine(line) + if x is None: + continue # blank line + (first_token, key_to_value) = x + layer_object = ConfigLineToObject(first_token, key_to_value, prev_names) + ans.append(layer_object) + prev_names.append(layer_object.Name()) + + +def TestLibrary(): + TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1] # remove 'end of string' + assert TokenizeTest("hi") == ['hi'] + assert TokenizeTest("hi there") == ['hi', 'there'] + assert TokenizeTest("hi,there") == ['hi', ',', 'there'] + assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there'] + assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')'] + assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ] + assert TokenizeDescriptor("[-2].special@2", ['foo', 'bar'])[:-1] == ['foo.special', '@', '2' ] + + assert Descriptor('foo').str() == 'foo' + assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)' + assert Descriptor('Sum(Offset(foo,1),Offset(foo,0))').str() == 'Sum(Offset(foo, 1), Offset(foo, 0))' + for x in [ 'Append(foo, Sum(bar, Offset(baz, 1)))', 'Failover(foo, Offset(bar, -1))', + 'IfDefined(Round(baz, 3))', 'Switch(foo1, Offset(foo2, 2), Offset(foo3, 3))', + 'IfDefined(ReplaceIndex(ivector, t, 0))', 'ReplaceIndex(foo, x, 0)' ]: + if not Descriptor(x).str() == x: + print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), x)) + + prev_names = ['last_but_one_layer', 'prev_layer'] + for x, y in [ ('Sum(foo,bar)', 'Sum(foo, bar)'), + ('Sum(foo1,bar-3_4)', 'Sum(foo1, bar-3_4)'), + ('Append(input@-3, input@0, input@3)', + 'Append(Offset(input, -3), input, Offset(input, 3))'), + ('Append(-3,0,3)', + 'Append(Offset(prev_layer, -3), prev_layer, Offset(prev_layer, 3))'), + ('[-1]', 'prev_layer'), + ('[-2]', 'last_but_one_layer'), + ('[-2]@3', + 'Offset(last_but_one_layer, 3)') ]: + if not Descriptor(x, prev_names).str() == y: + print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y)) + + + print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar')) + + print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar')) + print(ParseConfigLine('affine-layer')) diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py new file mode 100755 index 00000000000..bd841aae1f2 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os +import argparse +import shlex +import sys +import warnings +import copy +import imp +import ast +from collections import defaultdict + +sys.path.insert(0, 'steps/nnet3/libs/') +# the following is in case we weren't running this from the normal directory. +sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/') + +import xconfig_utils +import xconfig_layers + + +def GetArgs(): + # we add compulsary arguments as named arguments for readability + parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files ' + 'for neural net creation and training', + epilog='Search egs/*/*/local/nnet3/*sh for examples') + parser.add_argument('xconfig_file', + help='Filename of input xconfig file') + parser.add_argument('config_dir', + help='Directory to write config files and variables') + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.config_dir): + os.makedirs(args.config_dir) + return args + + +# # write the files used by other scripts like steps/nnet3/get_egs.sh +# f = open(config_dir + 'vars', 'w') +# print('model_left_context=' + str(left_context), file=f) +# print('model_right_context=' + str(right_context), file=f) +# print('num_hidden_layers=' + str(num_hidden_layers), file=f) +# print('num_targets=' + str(num_targets), file=f) +# print('add_lda=' + ('true' if add_lda else 'false'), file=f) +# print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f) +# print('objective_type=' + objective_type, file=f) +# f.close() + + + +def BackUpXconfigFile(xconfig_file, config_dir): + # we write a copy of the xconfig file just to have a record of the original + # input. + try: + xconfig_file_out = open(config_dir + '/xconfig', 'w') + except: + sys.exit('{0}: error opening file {1}/xconfig for output'.format( + sys.argv[0], config_dir)) + try: + xconfig_file_in = open(xconfig_file) + except: + sys.exit('{0}: error opening file {1} for input'.format(sys.argv[0], config_dir)) + + print("# This file was created by the command:\n" + "# {0}\n" + "# It is a copy of the source from which the config files in " + "# this directory were generated.\n".format(' '.join(sys.argv)), + file=xconfig_file_out) + + while True: + line = xconfig_file_in.readline() + if line == '': + break + print(line.strip(), file=xconfig_file_out) + xconfig_file_out.close() + xconfig_file_in.close() + + +# This functions writes config_dir/xconfig.expanded.1 and +# config_dir/xconfig.expanded.2, showing some of the internal stages of +# processing the xconfig file before turning it into config files. +def WriteExpandedXconfigFiles(config_dir, all_layers): + try: + xconfig_file_out = open(config_dir + '/xconfig.expanded.1', 'w') + except: + sys.exit('{0}: error opening file {1}/xconfig.expanded.1 for output'.format( + sys.argv[0], config_dir)) + + + print('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '#It contains the same content as ./xconfig but it was parsed and\n' + '#default config values were set.\n' + '# See also ./xconfig.expanded.2\n', file=xconfig_file_out) + + for layer in all_layers: + print(str(layer), file=xconfig_file_out) + xconfig_file_out.close() + + try: + xconfig_file_out = open(config_dir + '/xconfig.expanded.2', 'w') + except: + sys.exit('{0}: error opening file {1}/xconfig.expanded.2 for output'.format( + sys.argv[0], config_dir)) + + print('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the same content as ./xconfig but it was parsed,\n' + '# default config values were set, and Descriptors (input=xxx) were normalized.\n' + '# See also ./xconfig.expanded.1\n\n', + file=xconfig_file_out) + + for layer in all_layers: + layer.NormalizeDescriptors() + print(str(layer), file=xconfig_file_out) + xconfig_file_out.close() + + + + +# This function returns a map from config-file basename +# e.g. 'init', 'ref', 'layer1' to a documentation string that goes +# at the top of the file. +def GetConfigHeaders(): + ans = defaultdict(str) # resulting dict will default to the empty string + # for any config files not explicitly listed here. + ans['init'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the input of the network and is used in\n' + '# accumulating stats for an LDA-like transform of the\n' + '# input features.\n'); + ans['ref'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the entire neural network, but with those\n' + '# components that would normally require fixed vectors/matrices\n' + '# read from disk, replaced with random initialization\n' + '# (this applies to the LDA-like transform and the\n' + '# presoftmax-prior-scale, if applicable). This file\n' + '# is used only to work out the left-context and right-context\n' + '# of the network.\n'); + ans['all'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the entire neural network. It might not be used\n' + '# in the current scripts; it\'s provided for forward compatibility\n' + '# to possible future changes.\n') + + # Note: currently we just copy all lines that were going to go to 'all', into + # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py + ans['layer1'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the configuration of the entire neural network.\n' + '# The contents are the same\n' + '# as \'all.config\'. The reason this file is named this way (and\n' + '# that the config file `num_hidden_layers` contains 1, even though\n' + '# this file may really contain more than 1 hidden layer), is\n' + '# historical... we used to create networks by adding hidden layers\n' + '# one by one (discriminative pretraining), but more recently we\n' + '# have found that it\'s better to add them all at once. This file\n' + '# exists to enable the older training scripts to work. Note:\n' + '# it contains the inputs of the neural network even though it doesn\'t\n' + '# have to (since they are included in \'init.config\'). This will\n' + '# give us the flexibility to change the scripts in future.\n'); + return ans; + + + + +# This is where most of the work of this program happens. +def WriteConfigFiles(config_dir, all_layers): + # config_basename_to_lines is map from the basename of the + # config, as a string (i.e. 'ref', 'all', 'init') to a list of + # strings representing lines to put in the config file. + config_basename_to_lines = defaultdict(list) + + config_basename_to_header = GetConfigHeaders() + + for layer in all_layers: + try: + pairs = layer.GetFullConfig() + for config_basename, line in pairs: + config_basename_to_lines[config_basename].append(line) + except Exception as e: + print('{0}: error producing config lines from xconfig ' + 'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer), + repr(e)), file=sys.stderr) + raise(e) + + # currently we don't expect any of the GetFullConfig functions to output to + # config-basename 'layer1'... currently we just copy this from + # config-basename 'all', for back-compatibility to older scripts. + assert not 'layer1' in config_basename_to_lines + config_basename_to_lines['layer1'] = config_basename_to_lines['all'] + + for basename,lines in config_basename_to_lines.items(): + header = config_basename_to_header[basename] + filename = '{0}/{1}.config'.format(config_dir, basename) + try: + f = open(filename, 'w') + print(header, file=f) + for line in lines: + print(line, file=f) + f.close() + except Exception as e: + print('{0}: error writing to config file {1}: error is {2}'.format( + sys.argv[0], filename, repr(e)), file=sys.stderr) + raise e + + + + + +def Main(): + args = GetArgs() + BackUpXconfigFile(args.xconfig_file, args.config_dir) + all_layers = xconfig_layers.ReadXconfigFile(args.xconfig_file) + WriteExpandedXconfigFiles(args.config_dir, all_layers) + WriteConfigFiles(args.config_dir, all_layers) + + + +if __name__ == '__main__': + Main() + + +# test: +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + +# mkdir -p foo; (echo 'input dim=100 name=ivector'; echo 'input dim=40 name=input'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + +