From 0ecfe9069d0936b4d703729c8ec2d827b48ca76f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 26 Oct 2016 23:04:31 -0400 Subject: [PATCH 01/12] Adding early draft of xconfig library --- egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 483 +++++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py new file mode 100644 index 00000000000..437be386c90 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -0,0 +1,483 @@ +from __future__ import print_function +import subprocess +import logging +import math +import re +import sys +import traceback +import time +import argparse + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)s - %(funcName)s - %(levelname)s ] %(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + + +class StrToBoolAction(argparse.Action): + """ A custom action to convert bools from shell format i.e., true/false + to python format i.e., True/False """ + def __call__(self, parser, namespace, values, option_string=None): + if values == "true": + setattr(namespace, self.dest, True) + elif values == "false": + setattr(namespace, self.dest, False) + else: + raise Exception("Unknown value {0} for --{1}".format(values, self.dest)) + +class NullstrToNoneAction(argparse.Action): + """ A custom action to convert empty strings passed by shell + to None in python. This is necessary as shell scripts print null strings + when a variable is not specified. We could use the more apt None + in python. """ + def __call__(self, parser, namespace, values, option_string=None): + if values.strip() == "": + setattr(namespace, self.dest, None) + else: + setattr(namespace, self.dest, values) + + +# This class represents a line that starts with 'input', e.g. +# 'input name=ivector dim=100', or 'input name=input dim=40' +class XconfigInputLine: + # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }. + def __init__(self, key_to_value): + if not 'name' in key_to_value: + raise Exception("Config line for input does not specify name.") + self.name = key_to_value['name'] + if not IsValidLineName(self.name): + raise Exception("Name '{0}' is not a valid node name.".format(self.name)) + if not 'dim' in key_to_value: + raise Exception("Config line for input does not specify dimension.") + try: + self.dim = int(key_to_value['dim']) + assert self.dim > 0 + except: + raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim'])) + + # This returns the name of the layer. + def Name(): + return self.name + + # This returns the name of the principal output of the layer. For + # the input layer this is the same as the name. For an affine layer + # 'affine1' it might be e.g. 'affine1.relu'. + def OutputName(): + return self.name + + # note: layers have a function InputDim() also, so we call this dimension function + # OutputDim(). + def OutputDim(): + return self.dim + + +# A base-class for classes representing lines of xconfig files. +# This handles the +class XconfigLineBase: + def __init__(self): + pass + + def Name(): + return self.name + + def SetDims(): + raise Exception("SetDims() not implemented for this class") + + + + + +# This class parses and stores a Descriptor-- expression +# like Append(Offset(input, -3), input) and so on. +# For the full range of possible expressions, see the comment at the +# top of src/nnet3/nnet-descriptor.h. +# Note: as an extension to the descriptor format used in the C++ +# code, we can have e.g. input@-3 meaning Offset(input, -3); +# and if bare integer numbers appear where a descriptor was expected, +# they are interpreted as Offset(prev_layer, -3) where 'prev_layer' +# is the previous layer in the config file. + +# Also, in any place a raw input/layer/output name can appear, we accept things +# like [-1] meaning the previous input/layer/output's name, or [-2] meaning the +# last-but-one input/layer/output, and so on. +class Descriptor: + def __init__(self, + descriptor_string = None, + prev_names = None): + # self.operator is a string that may be 'Offset', 'Append', + # 'Sum', 'Failover', 'IfDefined', 'Offset', 'Switch', 'Round', + # 'ReplaceIndex'; it also may be None, representing the base-case + # (where it's just a layer name) + + # self.items will be whatever items are + # inside the parentheses, e.g. if this is Sum(foo bar), + # then items will be [d1, d2], where d1 is a Descriptor for + # 'foo' and d1 is a Descriptor for 'bar'. However, there are + # cases where elements of self.items are strings or integers, + # for instance in an expression 'ReplaceIndex(ivector, x, 0)', + # self.items would be [d, 'x', 0], where d is a Descriptor + # for 'ivector'. In the case where self.operator is None (where + # this Descriptor represents just a bare layer name), self. + # items contains the name of the input layer as a string. + self.operator = None + self.items = None + + if descriptor_string != None: + try: + tokens = TokenizeDescriptor(descriptor_string) + pos = 0 + (d, pos) = ParseNewDescriptor(tokens, pos, prev_names) + # note: 'pos' should point to the 'end of string' marker + # that terminates 'tokens'. + if pos != len(tokens) - 1: + raise Exception("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) + # copy members from d. + self.operator = d.operator + self.items = d.items + except Exception as e: + traceback.print_tb(sys.exc_info()[2]) + raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format( + descriptor_string, repr(e))) + + + def str(self): + if self.operator is None: + assert len(self.items) == 1 and isinstance(self.items[0], str) + return self.items[0] + else: + assert isinstance(self.operator, str) + return self.operator + '(' + ', '.join([str(item) for item in self.items]) + ')' + + def __str__(self): + return self.str() + + +# This just checks that seen_item == expected_item, and raises an +# exception if not. +def ExpectToken(expected_item, seen_item, what_parsing): + if seen_item != expected_item: + raise Exception("parsing {0}, expected '{1}' but got '{2}'".format( + what_parsing, expected_item, seen_item)) + +# returns true if 'name' is valid as the name of a line (input, layer or output); +# this is the same as IsValidName() in the nnet3 code. +def IsValidLineName(name): + return isinstance(name, str) and re.match(r'^[a-zA-Z_][-a-zA-Z_0-9.]*', name) != None + +# This function for parsing Descriptors takes an array of tokens as produced +# by TokenizeDescriptor. It parses a descriptor +# starting from position pos >= 0 of the array 'tokens', and +# returns a new position in the array that reflects any tokens consumed while +# parsing the descriptor. +# It returns a pair (d, pos) where d is the newly parsed Descriptor, +# and 'pos' is the new position after consuming the relevant input. +def ParseNewDescriptor(tokens, pos, + prev_names): + size = len(tokens) + first_token = tokens[pos] + pos += 1 + d = Descriptor() + + # when reading this function, be careful to note the indent level, + # there is an if-statement within an if-statement. + if first_token in [ 'Offset', 'Round', 'ReplaceIndex', 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]: + ExpectToken('(', tokens[pos], first_token + '()') + pos += 1 + d.operator = first_token + # the 1st argument of all these operators is a Descriptor. + (desc, pos) = ParseNewDescriptor(tokens, + pos, prev_names) + d.items = [desc] + + if first_token == 'Offset': + ExpectToken(',', tokens[pos], 'Offset()') + pos += 1 + try: + t_offset = int(tokens[pos]) + pos += 1 + d.items.append(t_offset) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + if tokens[pos] == ')': + return (d, pos + 1) + elif tokens[pos] != ',': + raise Exception("Parsing Offset(), expected ')' or ',', got " + tokens[pos]) + pos += 1 + try: + x_offset = int(tokens[pos]) + pos += 1 + d.items.append(x_offset) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'Offset()') + pos += 1 + elif first_token in [ 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]: + while True: + if tokens[pos] == ')': + # check num-items is correct for some special cases. + if first_token == 'Failover' and len(d.items) != 2: + raise Exception("Parsing Failover(), expected 2 items but got {0}".format(len(d.items))) + if first_token == 'IfDefined' and len(d.items) != 1: + raise Exception("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items))) + pos += 1 + break + elif tokens[pos] == ',': + pos += 1 # consume the comma. + else: + raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos]) + + (desc, pos) = ParseNewDescriptor(tokens, + pos, prev_names) + d.items.append(desc) + elif first_token == 'Round': + ExpectToken(',', tokens[pos], 'Round()') + pos += 1 + try: + t_modulus = int(tokens[pos]) + assert t_modulus > 0 + pos += 1 + d.items.append(t_modulus) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'Round()') + pos += 1 + elif first_token == 'ReplaceIndex': + ExpectToken(',', tokens[pos], 'ReplaceIndex()') + pos += 1 + if tokens[pos] in [ 'x', 't' ]: + d.items.append(tokens[pos]) + pos += 1 + else: + raise Exception("Parsing ReplaceIndex(), expected 'x' or 't', got " + + tokens[pos]) + ExpectToken(',', tokens[pos], 'ReplaceIndex()') + pos += 1 + try: + new_value = int(tokens[pos]) + pos += 1 + d.items.append(new_value) + except: + raise Exception("Parsing Offset(), expected integer, got " + tokens[pos]) + ExpectToken(')', tokens[pos], 'ReplaceIndex()') + pos += 1 + else: + raise Exception("code error") + elif first_token in [ 'end of string', '(', ')', ',', '@' ]: + raise Exception("Expected descriptor, got " + first_token) + elif IsValidLineName(first_token) or first_token == '[': + # This section parses either a raw input/layer/output name, e.g. "affine2" + # (which must start with an alphabetic character or underscore), + # or something like [-2], optionally followed by an offset like '@-3'. + if first_token == '[': + try: + offset_into_prev_names = int(tokens[pos]) + assert offset_into_prev_names < 0 + pos += 1 + except: + raise Exception("Parse error: after '[', expected negative integer, got '{0}'".format( + tokens[pos])) + ExpectToken(']', tokens[pos], 'Descriptor') + pos += 1 + assert isinstance(prev_names, list) + if -offset_into_prev_names > len(prev_names): + raise Exception("Error: expression [{0}] requested, but there are " + "not enough previous input or layer names to satisfy " + "this.".format(offset_into_prev_names)) + d.operator = None + # below, e.g. prev_names[-2] would give the last-but-one layer. + d.items = [prev_names[offset_into_prev_names]] + else: + # 'first_token' starts with a-z, A-Z or _, treat it + # as the name of a layer or input node. + d.operator = None + d.items = [first_token] + + # If the layer-name or expression like [-2] is followed by '@', then + # we're parsing something like 'affine1@-3' or '[-2]@3'. + if tokens[pos] == '@': + pos += 1 + try: + offset_t = int(tokens[pos]) + pos += 1 + except: + raise Exception("Parse error parsing {0}@{1}".format( + first_token, tokens[pos])) + if offset_t != 0: + inner_d = d + d = Descriptor() + # e.g. foo@3 is equivalent to 'Offset(foo, 3)'. + d.operator = 'Offset' + d.items = [ inner_d, offset_t ] + else: + # the last possible case is that 'first_token' is just an integer i, + # which can appear in things like Append(-3, 0, 3). + # See if the token is an integer. + # In this case, it's interpreted as the name of previous layer + # (with that time offset applied). + try: + offset_t = int(first_token) + except: + raise Exception("Parsing descriptor, expected descriptor but got " + + first_token) + assert isinstance(prev_names, list) + if len(prev_names) < 1: + raise Exception("Parsing descriptor, could not interpret '{0}' because " + "there is no previous layer".format(first_token)) + d.operator = None + # the layer name is the name of the most recent layer. + d.items = [prev_names[-1]] + if offset_t != 0: + inner_d = d + d = Descriptor() + d.operator = 'Offset' + d.items = [ inner_d, offset_t ] + return (d, pos) + + + + +# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors. +# Note: for convenience in parsing, we add the token 'end-of-string' to this +# list. +# The argument 'prev_names' (for the names of previous layers and input and +# output nodes) is needed to process expressions like [-1] meaning the most +# recent layer, or [-2] meaning the last layer but one. +def TokenizeDescriptor(descriptor_string, + prev_names = None): + # split on '(', ')', ',', '@', and space. + # Note: the parenthesis () in the regexp causes it to output + # the stuff inside the () as if it were a field, which is + # why we keep characters like '(' and ')' as tokens. + fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string) + ans = [] + for f in fields: + # don't include fields that are space, or are empty. + if re.match(r'^\s*$', f) is None: + ans.append(f) + + ans.append('end of string') + return ans + + +# This function parses a line in a config file, something like +# affine-layer name=affine1 input=Append(-3, 0, 3) +# and returns a pair, +# (first_token, fields), as (string, dict) e.g. in this case +# ('affine-layer', {'name':'affine1', 'input':'Append(-3, 0, 3)" +# Note: spaces are allowed in the field names but = signs are +# disallowed, which is why it's possible to parse them. +# This function also removes comments (anything after '#'). +# As a special case, this function will return NULL if the line +# is empty after removing spaces. +def ParseConfigLine(orig_config_line): + # Remove comments. + # note: splitting on '#' will always give at least one field... python + # treats splitting on space as a special case that may give zero fields. + config_line = orig_config_line.split('#')[0] + # Now split on space; later we may splice things back together. + fields=config_line.split() + if len(fields) == 0: + return None # Line was only whitespace after removing comments. + first_token = fields[0] + # if first_token does not look like 'foo-bar' or 'foo-bar2', then die. + if re.match('^[a-z][-a-z0-9]+$', first_token) is None: + raise Exception("Error parsing config line (first field doesn't look right): {0}".format( + orig_config_line)) + # get rid of the first field which we put in 'first_token'. + fields = fields[1:] + + rest_of_line = ' '.join(fields) + + # suppose rest_of_line is: 'input=Append(foo, bar) foo=bar' + # then after the below we'll get + # fields = ['', 'input', 'Append(foo, bar)', 'foo', 'bar'] + fields = re.split(r'\s*([-a-zA-Z0-9_]*)=', rest_of_line) + if not (fields[0] == '' and len(fields) % 2 == 1): + raise Exception("Could not parse config line: " + orig_config_line) + fields = fields[1:] + num_variables = len(fields) / 2 + ans_dict = dict() + for i in range(num_variables): + var_name = fields[i * 2] + var_value = fields[i * 2 + 1] + if re.match(r'[a-zA-Z_]', var_name) is None: + raise Exception("Expected variable name '{0}' to start with alphabetic character or _, " + "in config line {1}".format(var_name, orig_config_line)) + if var_name in ans_dict: + raise Exception("Config line has multiply defined variable {0}: {1}".format( + var_name, orig_config_line)) + ans_dict[var_name] = var_value + return (first_token, ans_dict) + + +# Reads a config file and returns a list of objects, where each object +# represents one line of the file. +def ReadConfigFile(filename): + try: + f = open(filename, "r") + except Exception as e: + raise Exception("Error reading config file {0}: {1}".format( + filename, repr(e))) + ans = [] + prev_names = [] + while True: + line = f.readline() + if line == '': + break + x = ParseConfigLine(line) + if x is None: + continue # blank line + (first_token, key_to_value) = x + layer_object = ConfigLineToObject(first_token, key_to_value, prev_names) + ans.append(layer_object) + prev_names.append(layer_object.Name()) + +# turns a config line that has been parsed into +# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, +# into an object representing that line of the config file. +# 'prev_names' is a list of the names of preceding lines of the +# config file. +def ConfigLineToObject(first_token, key_to_value, prev_names): + pass + + +def TestLibrary(): + TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1] # remove 'end of string' + assert TokenizeTest("hi") == ['hi'] + assert TokenizeTest("hi there") == ['hi', 'there'] + assert TokenizeTest("hi,there") == ['hi', ',', 'there'] + assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there'] + assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')'] + assert TokenizeTest("[-1]@2") == ['[', '-1', ']', '@', '2' ] + + assert Descriptor('foo').str() == 'foo' + assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)' + assert Descriptor('Sum(Offset(foo,1),Offset(foo,0))').str() == 'Sum(Offset(foo, 1), Offset(foo, 0))' + for x in [ 'Append(foo, Sum(bar, Offset(baz, 1)))', 'Failover(foo, Offset(bar, -1))', + 'IfDefined(Round(baz, 3))', 'Switch(foo1, Offset(foo2, 2), Offset(foo3, 3))', + 'IfDefined(ReplaceIndex(ivector, t, 0))', 'ReplaceIndex(foo, x, 0)' ]: + if not Descriptor(x).str() == x: + print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), x)) + + prev_names = ['last_but_one_layer', 'prev_layer'] + for x, y in [ ('Sum(foo,bar)', 'Sum(foo, bar)'), + ('Sum(foo1,bar-3_4)', 'Sum(foo1, bar-3_4)'), + ('Append(input@-3, input@0, input@3)', + 'Append(Offset(input, -3), input, Offset(input, 3))'), + ('Append(-3,0,3)', + 'Append(Offset(prev_layer, -3), prev_layer, Offset(prev_layer, 3))'), + ('[-1]', 'prev_layer'), + ('[-2]', 'last_but_one_layer'), + ('[-2]@3', + 'Offset(last_but_one_layer, 3)') ]: + if not Descriptor(x, prev_names).str() == y: + print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y)) + + print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar')) + + print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar')) + print(ParseConfigLine('affine-layer')) From 1553399a897eb8f2d3555b56d1f5fad658eb5cd1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 27 Oct 2016 00:29:47 -0400 Subject: [PATCH 02/12] Change how [-1] and the like are parsed in xconfig_lib.py --- egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 80 ++++++++++++---------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py index 437be386c90..d97900969b2 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -127,7 +127,7 @@ def __init__(self, if descriptor_string != None: try: - tokens = TokenizeDescriptor(descriptor_string) + tokens = TokenizeDescriptor(descriptor_string, prev_names) pos = 0 (d, pos) = ParseNewDescriptor(tokens, pos, prev_names) # note: 'pos' should point to the 'end of string' marker @@ -175,8 +175,10 @@ def IsValidLineName(name): # parsing the descriptor. # It returns a pair (d, pos) where d is the newly parsed Descriptor, # and 'pos' is the new position after consuming the relevant input. -def ParseNewDescriptor(tokens, pos, - prev_names): +# 'prev_names' is so that we can find the most recent layer name for +# expressions like Append(-3, 0, 3) which is shorthand for the most recent +# layer spliced at those time offsets. +def ParseNewDescriptor(tokens, pos, prev_names): size = len(tokens) first_token = tokens[pos] pos += 1 @@ -189,8 +191,7 @@ def ParseNewDescriptor(tokens, pos, pos += 1 d.operator = first_token # the 1st argument of all these operators is a Descriptor. - (desc, pos) = ParseNewDescriptor(tokens, - pos, prev_names) + (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names) d.items = [desc] if first_token == 'Offset': @@ -230,8 +231,7 @@ def ParseNewDescriptor(tokens, pos, else: raise Exception("Parsing Append(), expected ')' or ',', got " + tokens[pos]) - (desc, pos) = ParseNewDescriptor(tokens, - pos, prev_names) + (desc, pos) = ParseNewDescriptor(tokens, pos, prev_names) d.items.append(desc) elif first_token == 'Round': ExpectToken(',', tokens[pos], 'Round()') @@ -269,35 +269,16 @@ def ParseNewDescriptor(tokens, pos, elif first_token in [ 'end of string', '(', ')', ',', '@' ]: raise Exception("Expected descriptor, got " + first_token) elif IsValidLineName(first_token) or first_token == '[': - # This section parses either a raw input/layer/output name, e.g. "affine2" + # This section parses a raw input/layer/output name, e.g. "affine2" # (which must start with an alphabetic character or underscore), - # or something like [-2], optionally followed by an offset like '@-3'. - if first_token == '[': - try: - offset_into_prev_names = int(tokens[pos]) - assert offset_into_prev_names < 0 - pos += 1 - except: - raise Exception("Parse error: after '[', expected negative integer, got '{0}'".format( - tokens[pos])) - ExpectToken(']', tokens[pos], 'Descriptor') - pos += 1 - assert isinstance(prev_names, list) - if -offset_into_prev_names > len(prev_names): - raise Exception("Error: expression [{0}] requested, but there are " - "not enough previous input or layer names to satisfy " - "this.".format(offset_into_prev_names)) - d.operator = None - # below, e.g. prev_names[-2] would give the last-but-one layer. - d.items = [prev_names[offset_into_prev_names]] - else: - # 'first_token' starts with a-z, A-Z or _, treat it - # as the name of a layer or input node. - d.operator = None - d.items = [first_token] + # optionally followed by an offset like '@-3'. - # If the layer-name or expression like [-2] is followed by '@', then - # we're parsing something like 'affine1@-3' or '[-2]@3'. + d.operator = None + d.items = [first_token] + + # If the layer-name o is followed by '@', then + # we're parsing something like 'affine1@-3' which + # is syntactic sugar for 'Offset(affine1, 3)'. if tokens[pos] == '@': pos += 1 try: @@ -346,6 +327,7 @@ def ParseNewDescriptor(tokens, pos, # The argument 'prev_names' (for the names of previous layers and input and # output nodes) is needed to process expressions like [-1] meaning the most # recent layer, or [-2] meaning the last layer but one. +# The default None for prev_names is only supplied for testing purposes. def TokenizeDescriptor(descriptor_string, prev_names = None): # split on '(', ')', ',', '@', and space. @@ -354,9 +336,33 @@ def TokenizeDescriptor(descriptor_string, # why we keep characters like '(' and ')' as tokens. fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string) ans = [] - for f in fields: + i = 0 + while i < len(fields): + f = fields[i] + i = i + 1 # don't include fields that are space, or are empty. - if re.match(r'^\s*$', f) is None: + if re.match(r'^\s*$', f) is not None: + continue + if f == '[': + if i + 2 >= len(fields): + raise Exception("Error tokenizing string '{0}': '[' found too close " + "to the end of the descriptor.".format(descriptor_string)) + if fields[i+1] != ']': + raise Exception("Error tokenizing string '{0}': expected ']', got '{1}'".format( + descriptor_string, fields[i+1])) + assert isinstance(prev_names, list) + try: + offset = int(fields[i]) + assert offset < 0 and -offset <= len(prev_names) + i += 2 # consume the int and the ']'. + except: + raise Exception("Error tokenizing string '{0}': expression [{1}] has an " + "invalid or out of range offset.".format(descriptor_string, fields[i])) + this_field = prev_names[offset] + assert IsValidLineName(this_field) # should already have been + # checked, so assert. + ans.append(this_field) + else: ans.append(f) ans.append('end of string') @@ -452,7 +458,7 @@ def TestLibrary(): assert TokenizeTest("hi,there") == ['hi', ',', 'there'] assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there'] assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')'] - assert TokenizeTest("[-1]@2") == ['[', '-1', ']', '@', '2' ] + assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ] assert Descriptor('foo').str() == 'foo' assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)' From 6a8ecdbe83b610304fb728d80dbdb908f48ee8a2 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 30 Oct 2016 16:15:18 -0400 Subject: [PATCH 03/12] Adding some temporary work on xconfigs (will not work right now) --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 206 ++++++++++++++++++ egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 123 ++++++++--- 2 files changed, 296 insertions(+), 33 deletions(-) create mode 100644 egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py new file mode 100644 index 00000000000..44541588f7d --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -0,0 +1,206 @@ +from __future__ import print_function +import subprocess +import logging +import math +import re +import sys +import traceback +import time +import argparse +from xconfig_lib import * + +# This class represents a line that starts with 'input', e.g. +# 'input name=ivector dim=100', or 'input name=input dim=40' +class XconfigInputLine: + # Constructor. + # first_token must be the string 'input'. + # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }. + # 'prev_names' is a list of the names of preceding lines of the + # config file; it's not used here but is part of the common + # interface for xconfig input line constructors. + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'input' + if not 'name' in key_to_value: + raise Exception("Config line for input does not specify name.") + self.name = key_to_value['name'] + if not IsValidLineName(self.name): + raise Exception("Name '{0}' is not a valid node name.".format(self.name)) + if not 'dim' in key_to_value: + raise Exception("Config line for input does not specify dimension.") + try: + self.dim = int(key_to_value['dim']) + assert self.dim > 0 + if len(key_to_value) > 2: + raise Exception("Unused name=value pairs in config line") + except: + raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim'])) + + + # This returns the name of the layer, e.g. 'input' or 'ivector'. + def Name(): + return self.name + + # This returns the component-node name of the principal output of the layer. For + # the input layer this is the same as the name. For an affine layer + # 'affine1' it might be e.g. 'affine1.renorm'. + # The 'qualifier' parameter is for compatibility with other layer + # types, which support auxiliary outputs. + def OutputName(qualifier = None): + assert qualifier == None + return self.name + + # The dimension that this layer outputs. + # OutputDim(). + # The 'qualifier' parameter is for compatibility with other layer + # types, which support auxiliary outputs. + def OutputDim(qualifier = None): + assert qualifier == None + return self.dim + + # Returns a list of all qualifiers (meaning auxiliary outputs) that this + # layer supports (these are either 'None' for the regular output, or a + # string such as 'projection' or something like that, for auxiliary outputs. + def Qualifiers(): + return [ None ] + + # This function writes the 'full' config format, as would be read + # by the C++ programs. It writes the config lines to 'file'. + # 'all_layers' is a vector of objects (of type XConfigInputLine or + # inheriting from XconfigLayerBase), which is used to get + # the component names and + def GetFullConfig(self, file, all_layers): + print("input-node name={0} dim={0}".format(self.name, self.dim) + + def str(self): + return 'input name={0} dim={1}'.format(self.name, self.dim) + + def __str__(self): + return self.str() + + + +# A base-class for classes representing layers of xconfig files (but not input +# nodes). This handles parsing the Descriptors and other common tasks. +class XconfigLayerBase(object): + # Constructor. + # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f + # key_to_value is a dict like: + # { 'name':'affine1', 'input':'Append(0, 1, 2, ReplaceIndex(ivector, t, 0))', 'dim=1024' }. + # The only required and 'special' values that are dealt with directly at this level, are + # 'name' and 'input'. + # The rest are put in self.config and are dealt with by the child classes' init functions. + # prev_names is an array of the names (xxx in 'name=xxx') of previous + # lines of the config file. + def __init__(self, first_token, key_to_value, prev_names = None): + self.layer_type = first_token + if not 'name' in key_to_value + raise Exception("Expected 'name' to be specified.") + self.name = key_to_value['name'] + if not IsValidLineName(self.name): + raise Exception("Invalid value: name={0}".format(key_to_value['name'])) + + if not 'input' in key_to_value + raise Exception("Expected 'name' to be specified.") + input_descriptor_str = key_to_value[input] + tokens = TokenizeDescriptor(input_descriptor_str, prev_names) + pos = 0 + (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names) + # note: 'pos' should point to the 'end of string' marker + # that terminates 'tokens'. + if pos != len(tokens) - 1: + raise Exception("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) + # the following, which should be overridden in the child class, sets + # default config parameters in self.config. + self.SetDefaultConfigs() + self._OverrideConfigs() + # the following, which should be overridden in the child class, checks + # that the config parameters that have been set are reasonable. + self.CheckConfigs() + + + # We broke this code out of __init__ for clarity. + def _OverrideConfigs(key_to_value): + # the child-class constructor will deal with the configuration values + # in a more specific way. + for key,value in key_to_value.items(): + if key != 'name' and key != 'input': + if not key in self.config: + raise Exception("Configuration value {0}={1} was not expected in " + "layer of type {2}".format(key, value, self.layer_type)) + if isinstance(value, bool): + self.config[key] = ConvertValueToType(key, type(self.config[key]), + value) + + def GetDefaultConfigs(): + raise Exception("Child classes must override GetDefaultConfigs().") + + + # child classes may override this but do not have to. + def CheckConfigs(): + pass + + + # Returns a list of all qualifiers (meaning auxiliary outputs) that this + # layer supports (these are either 'None' for the regular output, or a + # string such as 'projection' or something like that, for auxiliary outputs. + # This is a default implementation of the function. + def Qualifiers(): + return [ None ] + + # This returns the component-node name of the principal output of the layer. For + # the input layer this is the same as the name. For an affine layer + # 'affine1' it might be e.g. 'affine1.renorm'. + # The 'qualifier' parameter is for compatibility with other layer + # types, which support auxiliary outputs. + def OutputName(qualifier = None): + raise Exception("Child classes must override OutputName()") + + # The dimension that this layer outputs. + # The 'qualifier' parameter is to support + # types, which support auxiliary outputs. + def OutputDim(qualifier = None): + raise Exception("Child classes must override OutputDim()") + + + # This function writes the 'full' config format, as would be read + # by the C++ programs. It writes the config lines to 'file'. + # 'all_layers' is a vector of objects (of type XConfigInputLine or + # inheriting from XconfigLayerBase), which is used to get + # the component names and dimensions at the input. + def GetFullConfig(self, file, all_layers): + raise Exception("Child classes must override GetFullConfig()") + + # Name() returns the name of this layer, e.g. 'affine1'. It does not + # necessarily correspond to a component name. + def Name(): + return self.name + + def str(self): + ans = '{0} name={1}'.format(self.layer_type, self.name) + ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key]) + for key in sorted(self.config.keys())]) + return ans + + def __str__(self): + return self.str() + + + +# Uses ParseConfigLine() to turn a config line that has been parsed into +# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, +# and then turns this into an object representing that line of the config file. +# 'prev_names' is a list of the names of preceding lines of the +# config file. +def ConfigLineToObject(config_line, prev_names = None): + (first_token, key_to_value) = ParseConfigLine(config_line) + + if first_token == 'input': + return XconfigInputLine(key_to_value) + + +def TestLayers(): + # for some config lines that should be printed the same way as they + # are read, check that this is the case. + for x in [ 'input name=input dim=30' ]: + assert str(ConfigLineToObject(x, [])) == x diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py index d97900969b2..f38d28c72a6 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -39,12 +39,42 @@ def __call__(self, parser, namespace, values, option_string=None): else: setattr(namespace, self.dest, values) +# This function, used in converting string values in config lines to +# configuration values in self.config in layers, attempts to +# convert 'string_value' to an instance dest_type (which is of type Type) +# 'key' is only needed for printing errors. +class ConvertValueToType(key, dest_type, string_value): + if dest_type == type(bool()): + if string_value == "True" or string_value == "true": + return True + elif string_value == "False" or string_value == "false": + return False + else: + raise Exception("Invalid configuration value {0}={1} (expected bool)".format( + key, string_value)) + elif dest_type == type(int()): + try: + return int(string_value) + except: + raise Exception("Invalid configuration value {0}={1} (expected int)".format( + key, string_value) + elif dest_type == type(float()): + try: + return float(string_value) + except: + raise Exception("Invalid configuration value {0}={1} (expected int)".format( + key, string_value) + elif dest_type == type(str()): + return sting_value + # This class represents a line that starts with 'input', e.g. # 'input name=ivector dim=100', or 'input name=input dim=40' class XconfigInputLine: # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }. - def __init__(self, key_to_value): + # prev_layer_names is not used here but other constructors for lines + # use it, so we must too. + def __init__(self, key_to_value, prev_layer_names = None): if not 'name' in key_to_value: raise Exception("Config line for input does not specify name.") self.name = key_to_value['name'] @@ -73,6 +103,13 @@ def OutputName(): def OutputDim(): return self.dim + def str(self): + return 'input name={0} dim={1}'.format(self.name, self.dim) + + def __str__(self): + return self.str() + + # A base-class for classes representing lines of xconfig files. # This handles the @@ -319,37 +356,29 @@ def ParseNewDescriptor(tokens, pos, prev_names): return (d, pos) - - -# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors. -# Note: for convenience in parsing, we add the token 'end-of-string' to this -# list. -# The argument 'prev_names' (for the names of previous layers and input and -# output nodes) is needed to process expressions like [-1] meaning the most -# recent layer, or [-2] meaning the last layer but one. -# The default None for prev_names is only supplied for testing purposes. -def TokenizeDescriptor(descriptor_string, - prev_names = None): - # split on '(', ')', ',', '@', and space. - # Note: the parenthesis () in the regexp causes it to output - # the stuff inside the () as if it were a field, which is - # why we keep characters like '(' and ')' as tokens. - fields = re.split(r'(\(|\)|@|,|\[|\]|\s)\s*', descriptor_string) - ans = [] +# This function takes a string 'descriptor_string' which might +# look like 'Append([-1], [-2], input)', and a list of previous layer +# names like prev_names = ['foo', 'bar', 'baz'], and replaces +# the integers in brackets with the previous layers. -1 means +# the most recent previous layer ('baz' in this case), -2 +# means the last layer but one ('bar' in this case), and so on. +# It will throw an exception if the number is out of range. +# If there are no such expressions in the string, it's OK if +# prev_names == None (this is useful for testing). +def ReplaceBracketExpressionsInDescriptor(descriptor_string, + prev_names = None): + fields = re.split(r'(\[|\])\s*', descriptor_string) + out_fields = [] i = 0 while i < len(fields): f = fields[i] - i = i + 1 - # don't include fields that are space, or are empty. - if re.match(r'^\s*$', f) is not None: - continue - if f == '[': + i += 1 + if f == ']': + raise Exception("Unmatched ']' in descriptor") + elif f == '[': if i + 2 >= len(fields): raise Exception("Error tokenizing string '{0}': '[' found too close " "to the end of the descriptor.".format(descriptor_string)) - if fields[i+1] != ']': - raise Exception("Error tokenizing string '{0}': expected ']', got '{1}'".format( - descriptor_string, fields[i+1])) assert isinstance(prev_names, list) try: offset = int(fields[i]) @@ -359,10 +388,33 @@ def TokenizeDescriptor(descriptor_string, raise Exception("Error tokenizing string '{0}': expression [{1}] has an " "invalid or out of range offset.".format(descriptor_string, fields[i])) this_field = prev_names[offset] - assert IsValidLineName(this_field) # should already have been - # checked, so assert. - ans.append(this_field) + out_fields.append(this_field) else: + out_fields.append(f) + return ''.join(out_fields) + + + +# tokenizes 'descriptor_string' into the tokens that may be part of Descriptors. +# Note: for convenience in parsing, we add the token 'end-of-string' to this +# list. +# The argument 'prev_names' (for the names of previous layers and input and +# output nodes) is needed to process expressions like [-1] meaning the most +# recent layer, or [-2] meaning the last layer but one. +# The default None for prev_names is only supplied for testing purposes. +def TokenizeDescriptor(descriptor_string, + prev_names = None): + # split on '(', ')', ',', '@', and space. Note: the parenthesis () in the + # regexp causes it to output the stuff inside the () as if it were a field, + # which is how the call to re.split() keeps characters like '(' and ')' as + # tokens. + fields = re.split(r'(\(|\)|@|,|\s)\s*', + ReplaceBracketExpressionsInDescriptor(descriptor_string, + prev_names)) + ans = [] + for f in fields: + # don't include fields that are space, or are empty. + if re.match(r'^\s*$', f) is None: ans.append(f) ans.append('end of string') @@ -442,13 +494,16 @@ def ReadConfigFile(filename): ans.append(layer_object) prev_names.append(layer_object.Name()) -# turns a config line that has been parsed into +# Uses ParseConfigLine() to turn a config line that has been parsed into # a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, -# into an object representing that line of the config file. +# and then turns this into an object representing that line of the config file. # 'prev_names' is a list of the names of preceding lines of the # config file. -def ConfigLineToObject(first_token, key_to_value, prev_names): - pass +def ConfigLineToObject(config_line, prev_names = None): + (first_token, key_to_value) = ParseConfigLine(config_line) + + if first_token == 'input': + return XconfigInputLine(key_to_value) def TestLibrary(): @@ -459,6 +514,7 @@ def TestLibrary(): assert TokenizeTest("hi@-1,there") == ['hi', '@', '-1', ',', 'there'] assert TokenizeTest("hi(there)") == ['hi', '(', 'there', ')'] assert TokenizeDescriptor("[-1]@2", ['foo', 'bar'])[:-1] == ['bar', '@', '2' ] + assert TokenizeDescriptor("[-2].special@2", ['foo', 'bar'])[:-1] == ['foo.special', '@', '2' ] assert Descriptor('foo').str() == 'foo' assert Descriptor('Sum(foo,bar)').str() == 'Sum(foo, bar)' @@ -483,6 +539,7 @@ def TestLibrary(): if not Descriptor(x, prev_names).str() == y: print("Error: '{0}' != '{1}'".format(Descriptor(x).str(), y)) + print(ParseConfigLine('affine-layer input=Append(foo, bar) foo=bar')) print(ParseConfigLine('affine-layer1 input=Append(foo, bar) foo=bar')) From 9adc26ce2a8f3ff3f681e8ea5a5ecae7e09a545b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 1 Nov 2016 23:26:44 -0400 Subject: [PATCH 04/12] Some partial work --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 337 +++++++++----- egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 127 ++---- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 421 ++++++++++++++++++ 3 files changed, 682 insertions(+), 203 deletions(-) create mode 100755 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index 44541588f7d..97f8c4846b6 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -9,78 +9,94 @@ import argparse from xconfig_lib import * -# This class represents a line that starts with 'input', e.g. -# 'input name=ivector dim=100', or 'input name=input dim=40' -class XconfigInputLine: - # Constructor. - # first_token must be the string 'input'. - # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }. - # 'prev_names' is a list of the names of preceding lines of the - # config file; it's not used here but is part of the common - # interface for xconfig input line constructors. - def __init__(self, first_token, key_to_value, prev_names = None): - assert first_token == 'input' - if not 'name' in key_to_value: - raise Exception("Config line for input does not specify name.") - self.name = key_to_value['name'] - if not IsValidLineName(self.name): - raise Exception("Name '{0}' is not a valid node name.".format(self.name)) - if not 'dim' in key_to_value: - raise Exception("Config line for input does not specify dimension.") - try: - self.dim = int(key_to_value['dim']) - assert self.dim > 0 - if len(key_to_value) > 2: - raise Exception("Unused name=value pairs in config line") - except: - raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim'])) - - - # This returns the name of the layer, e.g. 'input' or 'ivector'. - def Name(): - return self.name - - # This returns the component-node name of the principal output of the layer. For - # the input layer this is the same as the name. For an affine layer - # 'affine1' it might be e.g. 'affine1.renorm'. - # The 'qualifier' parameter is for compatibility with other layer - # types, which support auxiliary outputs. - def OutputName(qualifier = None): - assert qualifier == None - return self.name - - # The dimension that this layer outputs. - # OutputDim(). - # The 'qualifier' parameter is for compatibility with other layer - # types, which support auxiliary outputs. - def OutputDim(qualifier = None): - assert qualifier == None - return self.dim - - # Returns a list of all qualifiers (meaning auxiliary outputs) that this - # layer supports (these are either 'None' for the regular output, or a - # string such as 'projection' or something like that, for auxiliary outputs. - def Qualifiers(): - return [ None ] - - # This function writes the 'full' config format, as would be read - # by the C++ programs. It writes the config lines to 'file'. - # 'all_layers' is a vector of objects (of type XConfigInputLine or - # inheriting from XconfigLayerBase), which is used to get - # the component names and - def GetFullConfig(self, file, all_layers): - print("input-node name={0} dim={0}".format(self.name, self.dim) - - def str(self): - return 'input name={0} dim={1}'.format(self.name, self.dim) - - def __str__(self): - return self.str() - - - -# A base-class for classes representing layers of xconfig files (but not input -# nodes). This handles parsing the Descriptors and other common tasks. +# Given a list of objects of type XconfigLayerBase ('all_layers'), +# including at least the layers preceding 'current_layer' (and maybe +# more layers), return the names of layers preceding 'current_layer' +# This will be used in parsing expressions like [-1] in descriptors +# (which is an alias for the previous layer). +def GetPrevNames(all_layers, current_layer): + assert current_layer in all_layers + prev_names = [] + for layer in all_layers: + if layer is current_layer: + break + prev_names.append(layer.Name()) + return prev_names + +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a dimension. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetDimFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise Exception("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputDim(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise Exception("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise Exception("No such layer: '{0}'".format(layer_name)) + + +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple +# component-node name) that can appear in the generated config file. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetStringFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise Exception("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputName(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise Exception("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise Exception("No such layer: '{0}'".format(layer_name)) + + + +# A base-class for classes representing layers of xconfig files. +# This mainly just sets self.layer_type, self.name and self.config, class XconfigLayerBase(object): # Constructor. # first_token is the first token on the xconfig line, e.g. 'affine-layer'.f @@ -91,36 +107,28 @@ class XconfigLayerBase(object): # The rest are put in self.config and are dealt with by the child classes' init functions. # prev_names is an array of the names (xxx in 'name=xxx') of previous # lines of the config file. + def __init__(self, first_token, key_to_value, prev_names = None): self.layer_type = first_token - if not 'name' in key_to_value + if not 'name' in key_to_value: raise Exception("Expected 'name' to be specified.") self.name = key_to_value['name'] if not IsValidLineName(self.name): raise Exception("Invalid value: name={0}".format(key_to_value['name'])) - if not 'input' in key_to_value - raise Exception("Expected 'name' to be specified.") - input_descriptor_str = key_to_value[input] - tokens = TokenizeDescriptor(input_descriptor_str, prev_names) - pos = 0 - (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names) - # note: 'pos' should point to the 'end of string' marker - # that terminates 'tokens'. - if pos != len(tokens) - 1: - raise Exception("Parsing Descriptor, saw junk at end: " + - ' '.join(tokens[pos:-1])) # the following, which should be overridden in the child class, sets # default config parameters in self.config. self.SetDefaultConfigs() - self._OverrideConfigs() + # The following is not to be reimplemented in child classes; + # sets the config files to those specified by the user. + self._SetConfigs(key_to_value) # the following, which should be overridden in the child class, checks # that the config parameters that have been set are reasonable. self.CheckConfigs() # We broke this code out of __init__ for clarity. - def _OverrideConfigs(key_to_value): + def _SetConfigs(self, key_to_value): # the child-class constructor will deal with the configuration values # in a more specific way. for key,value in key_to_value.items(): @@ -128,63 +136,146 @@ def _OverrideConfigs(key_to_value): if not key in self.config: raise Exception("Configuration value {0}={1} was not expected in " "layer of type {2}".format(key, value, self.layer_type)) - if isinstance(value, bool): - self.config[key] = ConvertValueToType(key, type(self.config[key]), - value) + self.config[key] = ConvertValueToType(key, type(self.config[key]), value) - def GetDefaultConfigs(): - raise Exception("Child classes must override GetDefaultConfigs().") + # This function converts 'this' to a string which could be printed to an + # xconfig file; in xconfig_to_configs.py we actually expand all the lines to + # strings and write it as xconfig.expanded as a reference (so users can + # see any defaults). + def str(self): + ans = '{0} name={1}'.format(self.layer_type, self.name) + ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key]) + for key in sorted(self.config.keys())]) + return ans - # child classes may override this but do not have to. + def __str__(self): + return self.str() + + # This function, which is a convenience function intended to be called from + # child classes, converts a string representing a descriptor + # ('descriptor_string') into an object of type Descriptor, and returns it. + # It needs 'self' and 'all_layers' (where 'all_layers' is a list of objects + # of type XconfigLayerBase) so that it can work out a list of the names of + # other layers, and get dimensions from them. + def ConvertToDescriptor(self, descriptor_string, all_layers): + prev_names = GetPrevNames(all_layers, self) + tokens = TokenizeDescriptor(descriptor_string, prev_names) + pos = 0 + (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names) + # note: 'pos' should point to the 'end of string' marker + # that terminates 'tokens'. + if pos != len(tokens) - 1: + raise Exception("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) + + # Returns the dimension of a Descriptor object. + # This is a convenience function provided for use in child classes; + def GetDimForDescriptor(self, descriptor, all_layers): + layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name) + return descriptor.Dim(layer_to_dim_func) + + # Returns the 'final' string form of a Descriptor object, as could be used + # in config files. + # This is a convenience function provided for use in child classes; + def GetStringForDescriptor(self, descriptor, all_layers): + layer_to_string_func = lambda name: GetStringFromLayerName(all_layers, self, name) + return descriptor.ConfigString(layer_to_string_func) + + # Name() returns the name of this layer, e.g. 'affine1'. It does not + # necessarily correspond to a component name. + def Name(): + return self.name + + ###### Functions that should be overridden by the child class: ##### + + # child classes should override this. + def SetDefaultConfigs(): + raise Exception("Child classes must override SetDefaultConfigs().") + + # child classes should override this. def CheckConfigs(): pass - # Returns a list of all qualifiers (meaning auxiliary outputs) that this - # layer supports (these are either 'None' for the regular output, or a - # string such as 'projection' or something like that, for auxiliary outputs. - # This is a default implementation of the function. + # layer supports. These are either 'None' for the regular output, or a + # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that + # the layer might provide. Most layer types will not need to override this. def Qualifiers(): return [ None ] - # This returns the component-node name of the principal output of the layer. For - # the input layer this is the same as the name. For an affine layer - # 'affine1' it might be e.g. 'affine1.renorm'. - # The 'qualifier' parameter is for compatibility with other layer - # types, which support auxiliary outputs. + # Called with qualifier == None, this returns the component-node name of the + # principal output of the layer (or if you prefer, the text form of a + # descriptor that gives you such an output; such as Append(some_node, + # some_other_node)). + # The 'qualifier' argument is a text value that is designed for extensions + # to layers that have additional auxiliary outputs. For example, to implement + # a highway LSTM you need the memory-cell of a layer, so you might allow + # qualifier='memory_cell' for such a layer type, and it would return the + # component node or a suitable Descriptor: something like 'lstm3.c_t' def OutputName(qualifier = None): raise Exception("Child classes must override OutputName()") - # The dimension that this layer outputs. - # The 'qualifier' parameter is to support - # types, which support auxiliary outputs. + # The dimension that this layer outputs. The 'qualifier' parameter is for + # layer types which support auxiliary outputs. def OutputDim(qualifier = None): raise Exception("Child classes must override OutputDim()") - - # This function writes the 'full' config format, as would be read - # by the C++ programs. It writes the config lines to 'file'. - # 'all_layers' is a vector of objects (of type XConfigInputLine or - # inheriting from XconfigLayerBase), which is used to get - # the component names and dimensions at the input. - def GetFullConfig(self, file, all_layers): + # This function returns lines destined for the 'full' config format, as + # would be read by the C++ programs. + # Since the program xconfig_to_configs.py writes several config files, this + # function returns a list of pairs of the form (config_file_basename, line), + # e.g. something like + # [ ('init', 'input-node name=input dim=40'), + # ('ref', 'input-node name=input dim=40') ] + # which would be written to config_dir/init.config and config_dir/ref.config. + # + # 'all_layers' is a vector of objects inheriting from XconfigLayerBase, + # which is used to get the component names and dimensions at the input. + def GetFullConfig(self, all_layers): raise Exception("Child classes must override GetFullConfig()") - # Name() returns the name of this layer, e.g. 'affine1'. It does not - # necessarily correspond to a component name. - def Name(): - return self.name - def str(self): - ans = '{0} name={1}'.format(self.layer_type, self.name) - ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key]) - for key in sorted(self.config.keys())]) - return ans +# This class is for lines like +# 'input name=input dim=40' +# or +# 'input name=ivector dim=100' +# in the config file. +class XconfigInputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'input' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) - def __str__(self): - return self.str() + def SetDefaultConfigs(self): + self.config = { 'dim':-1 } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise Exception("Dimension of input-layer '{0}' is not set".format(self.name)) + + def OutputName(qualifier = None): + assert qualifier is None + return self.name + + def OutputDim(qualifier = None): + assert qualifier is None + return self.config['dim'] + + def GetFullConfig(self, all_layers): + # the input layers need to be printed in 'init.config' (which + # initializes the neural network prior to the LDA), in 'ref.config', + # which is a version of the config file used for getting left and right + # context (it doesn't read anything for the LDA-like transform and/or + # presoftmax-prior-scale components) + # In 'full.config' we write everything, this is just for reference, + # and also for cases where we don't use the LDA-like transform. + ans = [] + for config_name in [ 'init', 'ref', 'full' ]: + ans.append( (config_name, + 'input-node name={0} dim={1}'.format(self.name, + self.config['dim']))) + return ans # Uses ParseConfigLine() to turn a config line that has been parsed into @@ -196,7 +287,7 @@ def ConfigLineToObject(config_line, prev_names = None): (first_token, key_to_value) = ParseConfigLine(config_line) if first_token == 'input': - return XconfigInputLine(key_to_value) + return XconfigInputLayer(first_token, key_to_value) def TestLayers(): diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py index f38d28c72a6..17a25f9fbb4 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -8,14 +8,6 @@ import time import argparse -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -handler = logging.StreamHandler() -handler.setLevel(logging.INFO) -formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)s - %(funcName)s - %(levelname)s ] %(message)s') -handler.setFormatter(formatter) -logger.addHandler(handler) - class StrToBoolAction(argparse.Action): """ A custom action to convert bools from shell format i.e., true/false @@ -43,7 +35,7 @@ def __call__(self, parser, namespace, values, option_string=None): # configuration values in self.config in layers, attempts to # convert 'string_value' to an instance dest_type (which is of type Type) # 'key' is only needed for printing errors. -class ConvertValueToType(key, dest_type, string_value): +def ConvertValueToType(key, dest_type, string_value): if dest_type == type(bool()): if string_value == "True" or string_value == "true": return True @@ -57,75 +49,17 @@ class ConvertValueToType(key, dest_type, string_value): return int(string_value) except: raise Exception("Invalid configuration value {0}={1} (expected int)".format( - key, string_value) + key, string_value)) elif dest_type == type(float()): try: return float(string_value) except: raise Exception("Invalid configuration value {0}={1} (expected int)".format( - key, string_value) + key, string_value)) elif dest_type == type(str()): return sting_value -# This class represents a line that starts with 'input', e.g. -# 'input name=ivector dim=100', or 'input name=input dim=40' -class XconfigInputLine: - # key_to_value is a dict like { 'name':'ivector', 'dim':'100' }. - # prev_layer_names is not used here but other constructors for lines - # use it, so we must too. - def __init__(self, key_to_value, prev_layer_names = None): - if not 'name' in key_to_value: - raise Exception("Config line for input does not specify name.") - self.name = key_to_value['name'] - if not IsValidLineName(self.name): - raise Exception("Name '{0}' is not a valid node name.".format(self.name)) - if not 'dim' in key_to_value: - raise Exception("Config line for input does not specify dimension.") - try: - self.dim = int(key_to_value['dim']) - assert self.dim > 0 - except: - raise Exception("Dimension '{0}' is not valid.".format(key_to_value['dim'])) - - # This returns the name of the layer. - def Name(): - return self.name - - # This returns the name of the principal output of the layer. For - # the input layer this is the same as the name. For an affine layer - # 'affine1' it might be e.g. 'affine1.relu'. - def OutputName(): - return self.name - - # note: layers have a function InputDim() also, so we call this dimension function - # OutputDim(). - def OutputDim(): - return self.dim - - def str(self): - return 'input name={0} dim={1}'.format(self.name, self.dim) - - def __str__(self): - return self.str() - - - -# A base-class for classes representing lines of xconfig files. -# This handles the -class XconfigLineBase: - def __init__(self): - pass - - def Name(): - return self.name - - def SetDims(): - raise Exception("SetDims() not implemented for this class") - - - - # This class parses and stores a Descriptor-- expression # like Append(Offset(input, -3), input) and so on. @@ -180,6 +114,19 @@ def __init__(self, raise Exception("Error parsing Descriptor '{0}', specific error was: {1}".format( descriptor_string, repr(e))) + # This is like the str() function, but it uses the layer_to_string function + # (which is a function from strings to strings) to convert layer names (or + # in general sub-layer names of the form 'foo.bar') to the component-node + # (or, in general, descriptor) names that appear in the final config file. + # This mechanism gives those designing layer types the freedom to name their + # nodes as they want. + def ConfigString(self, layer_to_string): + if self.operator is None: + assert len(self.items) == 1 and isinstance(self.items[0], str) + return layer_to_node(self.items[0]) + else: + assert isinstance(self.operator, str) + return self.operator + '(' + ', '.join([OutputString(item, layer_to_node) for item in self.items]) + ')' def str(self): if self.operator is None: @@ -192,6 +139,37 @@ def str(self): def __str__(self): return self.str() + # This function returns the dimension (i.e. the feature dimension) of the + # descriptor. It takes 'layer_to_dim' which is a function from + # layer-names (including sub-layer names, like lstm1.memory_cell) to + # dimensions, e.g. you might have layer_to_dim('ivector') = 100, or + # layer_to_dim('affine1') = 1024. + # note: layer_to_dim will raise an exception if a nonexistent layer or + # sub-layer is requested. + def Dim(self, layer_to_dim): + if self.operator is None: + # base-case: self.items = [ layer_name ] (or sub-layer name, like + # 'lstm.memory_cell'). + return layer_to_dim(self.items[0]) + elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]: + # these are all operators for which all args are descriptors + # and must have the same dim. + dim = self.items[0].Dim() + for desc in self.items[1:]: + next_dim = desc.Dim() + if next_dim != dim: + raise Exception("In descriptor {0}, different fields have different " + "dimensions: {1} != {2}".format(self.str(), dim, next_dim)) + return dim + elif self.operator in [ 'Offset', 'Round', 'ReplaceIndex' ]: + # for these operators, only the 1st arg is relevant. + return self.items[0].Dim() + elif self.operator == 'Append': + return sum([ x.Dim() for x in self.items]) + else: + raise Exception("Unknown operator {0}".format(self.operator)) + + # This just checks that seen_item == expected_item, and raises an # exception if not. @@ -494,17 +472,6 @@ def ReadConfigFile(filename): ans.append(layer_object) prev_names.append(layer_object.Name()) -# Uses ParseConfigLine() to turn a config line that has been parsed into -# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, -# and then turns this into an object representing that line of the config file. -# 'prev_names' is a list of the names of preceding lines of the -# config file. -def ConfigLineToObject(config_line, prev_names = None): - (first_token, key_to_value) = ParseConfigLine(config_line) - - if first_token == 'input': - return XconfigInputLine(key_to_value) - def TestLibrary(): TokenizeTest = lambda x: TokenizeDescriptor(x)[:-1] # remove 'end of string' diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py new file mode 100755 index 00000000000..316a4bb4cb3 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os +import argparse +import shlex +import sys +import warnings +import copy +import imp +import ast + + +sys.path.insert(0, 'steps/nnet3/libs/') +from xconfig_lib import * +from xconfig_layers import * + + +def GetArgs(): + # we add compulsary arguments as named arguments for readability + parser = argparse.ArgumentParser(description="Reads an xconfig file and creates config files " + "for neural net creation and training", + epilog="Search egs/*/*/local/nnet3/*sh for examples") + + parser.add_argument("--self-repair-scale-nonlinearity", type=float, + help="A non-zero value activates the self-repair mechanism in " + "nonlinearities (larger -> faster self-repair)", default=1.0e-05) + parser.add_argument("xconfig_file", + help="Filename of input xconfig file") + parser.add_argument("config_dir", + help="Directory to write config files and variables") + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.config_dir): + os.makedirs(args.config_dir) + if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1: + sys.exit("{0}: invalid option --self-repair-scale-nonlinearity={1}".format( + sys.argv[0], args.self_repair_scale_nonlinearity)) + + return args + +def AddConvMaxpLayer(config_lines, name, input, args): + if '3d-dim' not in input: + raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.") + + input = nodes.AddConvolutionLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.filt_x_dim, args.filt_y_dim, + args.filt_x_step, args.filt_y_step, + args.num_filters, input['vectorization']) + + if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1: + input = nodes.AddMaxpoolingLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.pool_x_size, args.pool_y_size, args.pool_z_size, + args.pool_x_step, args.pool_y_step, args.pool_z_step) + + return input + +# The ivectors are processed through an affine layer parallel to the CNN layers, +# then concatenated with the CNN output and passed to the deeper part of the network. +def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0): + cnn_args = ParseCnnString(cnn_layer) + num_cnn_layers = len(cnn_args) + # We use an Idct layer here to convert MFCC to FBANK features + nnet3_train_lib.WriteIdctMatrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat") + prev_layer_output = {'descriptor': "input", + 'dimension': feat_dim} + prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat') + + list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes] + splice_descriptor = "Append({0})".format(", ".join(list)) + cnn_input_dim = len(splice_indexes) * feat_dim + prev_layer_output = {'descriptor': splice_descriptor, + 'dimension': cnn_input_dim, + '3d-dim': [len(splice_indexes), feat_dim, 1], + 'vectorization': 'yzx'} + + for cl in range(0, num_cnn_layers): + prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl]) + + if cnn_bottleneck_dim > 0: + prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "") + + if ivector_dim > 0: + iv_layer_output = {'descriptor': 'ReplaceIndex(ivector, t, 0)', + 'dimension': ivector_dim} + iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "") + prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor']) + prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension'] + + return prev_layer_output + +def PrintConfig(file_name, config_lines): + f = open(file_name, 'w') + f.write("\n".join(config_lines['components'])+"\n") + f.write("\n#Component nodes\n") + f.write("\n".join(config_lines['component-nodes'])+"\n") + f.close() + +def ParseCnnString(cnn_param_string_list): + cnn_parser = argparse.ArgumentParser(description="cnn argument parser") + + cnn_parser.add_argument("--filt-x-dim", required=True, type=int) + cnn_parser.add_argument("--filt-y-dim", required=True, type=int) + cnn_parser.add_argument("--filt-x-step", type=int, default = 1) + cnn_parser.add_argument("--filt-y-step", type=int, default = 1) + cnn_parser.add_argument("--num-filters", required=True, type=int) + cnn_parser.add_argument("--pool-x-size", type=int, default = 1) + cnn_parser.add_argument("--pool-y-size", type=int, default = 1) + cnn_parser.add_argument("--pool-z-size", type=int, default = 1) + cnn_parser.add_argument("--pool-x-step", type=int, default = 1) + cnn_parser.add_argument("--pool-y-step", type=int, default = 1) + cnn_parser.add_argument("--pool-z-step", type=int, default = 1) + + cnn_args = [] + for cl in range(0, len(cnn_param_string_list)): + cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl]))) + + return cnn_args + +def ParseSpliceString(splice_indexes): + splice_array = [] + left_context = 0 + right_context = 0 + split1 = splice_indexes.split(); # we already checked the string is nonempty. + if len(split1) < 1: + raise Exception("invalid splice-indexes argument, too short: " + + splice_indexes) + try: + for string in split1: + split2 = string.split(",") + if len(split2) < 1: + raise Exception("invalid splice-indexes argument, too-short element: " + + splice_indexes) + int_list = [] + for int_str in split2: + int_list.append(int(int_str)) + if not int_list == sorted(int_list): + raise Exception("elements of splice-indexes must be sorted: " + + splice_indexes) + left_context += -int_list[0] + right_context += int_list[-1] + splice_array.append(int_list) + except ValueError as e: + raise Exception("invalid splice-indexes argument " + splice_indexes + str(e)) + left_context = max(0, left_context) + right_context = max(0, right_context) + + return {'left_context':left_context, + 'right_context':right_context, + 'splice_indexes':splice_array, + 'num_hidden_layers':len(splice_array) + } + +# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script. +def MakeConfigs(config_dir, splice_indexes_string, + cnn_layer, cnn_bottleneck_dim, cepstral_lifter, + feat_dim, ivector_dim, num_targets, add_lda, + nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim, + nonlin_output_dim_init, nonlin_output_dim_final, + use_presoftmax_prior_scale, + final_layer_normalize_target, + include_log_softmax, + add_final_sigmoid, + xent_regularize, + xent_separate_forward_affine, + self_repair_scale, + objective_type): + + parsed_splice_output = ParseSpliceString(splice_indexes_string.strip()) + + left_context = parsed_splice_output['left_context'] + right_context = parsed_splice_output['right_context'] + num_hidden_layers = parsed_splice_output['num_hidden_layers'] + splice_indexes = parsed_splice_output['splice_indexes'] + input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim + + if xent_separate_forward_affine: + if splice_indexes[-1] != [0]: + raise Exception("--xent-separate-forward-affine option is supported only if the last-hidden layer has no splicing before it. Please use a splice-indexes with just 0 as the final splicing config.") + + prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir) + + config_lines = {'components':[], 'component-nodes':[]} + + config_files={} + prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], ivector_dim) + + # Add the init config lines for estimating the preconditioning matrices + init_config_lines = copy.deepcopy(config_lines) + init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to') + init_config_lines['components'].insert(0, '# preconditioning matrix computation') + nodes.AddOutputLayer(init_config_lines, prev_layer_output) + config_files[config_dir + '/init.config'] = init_config_lines + + if cnn_layer is not None: + prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, + feat_dim, splice_indexes[0], ivector_dim) + + if add_lda: + prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat') + + left_context = 0 + right_context = 0 + # we moved the first splice layer to before the LDA.. + # so the input to the first affine layer is going to [0] index + splice_indexes[0] = [0] + + if not nonlin_output_dim is None: + nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers + elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1: + raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.") + else: + # computes relu-dim for each hidden layer. They increase geometrically across layers + factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1 + nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)] + assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error + nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified + + for i in range(0, num_hidden_layers): + # make the intermediate config file for layerwise discriminative training + + # prepare the spliced input + if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0): + try: + zero_index = splice_indexes[i].index(0) + except ValueError: + zero_index = None + # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor + prev_layer_output_descriptor = prev_layer_output['descriptor'] + subset_output = prev_layer_output + if subset_dim > 0: + # if subset_dim is specified the script expects a zero in the splice indexes + assert(zero_index is not None) + subset_node_config = "dim-range-node name=Tdnn_input_{0} input-node={1} dim-offset={2} dim={3}".format(i, prev_layer_output_descriptor, 0, subset_dim) + subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i), + 'dimension' : subset_dim} + config_lines['component-nodes'].append(subset_node_config) + appended_descriptors = [] + appended_dimension = 0 + for j in range(len(splice_indexes[i])): + if j == zero_index: + appended_descriptors.append(prev_layer_output['descriptor']) + appended_dimension += prev_layer_output['dimension'] + continue + appended_descriptors.append('Offset({0}, {1})'.format(subset_output['descriptor'], splice_indexes[i][j])) + appended_dimension += subset_output['dimension'] + prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)), + 'dimension' : appended_dimension} + else: + # this is a normal affine node + pass + + if xent_separate_forward_affine and i == num_hidden_layers - 1: + if xent_regularize == 0.0: + raise Exception("xent-separate-forward-affine=True is valid only if xent-regularize is non-zero") + + if nonlin_type == "relu" : + prev_layer_output_chain = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_chain", + prev_layer_output, nonlin_output_dim, + self_repair_scale = self_repair_scale, + norm_target_rms = final_layer_normalize_target) + + prev_layer_output_xent = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_xent", + prev_layer_output, nonlin_output_dim, + self_repair_scale = self_repair_scale, + norm_target_rms = final_layer_normalize_target) + elif nonlin_type == "pnorm" : + prev_layer_output_chain = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_chain", + prev_layer_output, nonlin_input_dim, nonlin_output_dim, + norm_target_rms = final_layer_normalize_target) + + prev_layer_output_xent = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_xent", + prev_layer_output, nonlin_input_dim, nonlin_output_dim, + norm_target_rms = final_layer_normalize_target) + else: + raise Exception("Unknown nonlinearity type") + + nodes.AddFinalLayer(config_lines, prev_layer_output_chain, num_targets, + use_presoftmax_prior_scale = use_presoftmax_prior_scale, + prior_scale_file = prior_scale_file, + include_log_softmax = include_log_softmax) + + nodes.AddFinalLayer(config_lines, prev_layer_output_xent, num_targets, + ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format( + 0.5 / xent_regularize), + use_presoftmax_prior_scale = use_presoftmax_prior_scale, + prior_scale_file = prior_scale_file, + include_log_softmax = True, + name_affix = 'xent') + else: + if nonlin_type == "relu": + prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_output_dims[i], + self_repair_scale = self_repair_scale, + norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + elif nonlin_type == "pnorm": + prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_input_dim, nonlin_output_dim, + norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + else: + raise Exception("Unknown nonlinearity type") + # a final layer is added after each new layer as we are generating + # configs for layer-wise discriminative training + + # add_final_sigmoid adds a sigmoid as a final layer as alternative + # to log-softmax layer. + # http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression#Softmax_Regression_vs._k_Binary_Classifiers + # This is useful when you need the final outputs to be probabilities between 0 and 1. + # Usually used with an objective-type such as "quadratic". + # Applications are k-binary classification such Ideal Ratio Mask prediction. + nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, + use_presoftmax_prior_scale = use_presoftmax_prior_scale, + prior_scale_file = prior_scale_file, + include_log_softmax = include_log_softmax, + add_final_sigmoid = add_final_sigmoid, + objective_type = objective_type) + if xent_regularize != 0.0: + nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, + ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format( + 0.5 / xent_regularize), + use_presoftmax_prior_scale = use_presoftmax_prior_scale, + prior_scale_file = prior_scale_file, + include_log_softmax = True, + name_affix = 'xent') + + config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines + config_lines = {'components':[], 'component-nodes':[]} + + left_context += int(parsed_splice_output['left_context']) + right_context += int(parsed_splice_output['right_context']) + + # write the files used by other scripts like steps/nnet3/get_egs.sh + f = open(config_dir + "/vars", "w") + print('model_left_context=' + str(left_context), file=f) + print('model_right_context=' + str(right_context), file=f) + print('num_hidden_layers=' + str(num_hidden_layers), file=f) + print('num_targets=' + str(num_targets), file=f) + print('add_lda=' + ('true' if add_lda else 'false'), file=f) + print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f) + print('objective_type=' + objective_type, file=f) + f.close() + + # printing out the configs + # init.config used to train lda-mllt train + for key in config_files.keys(): + PrintConfig(key, config_files[key]) + + +def BackUpXconfigFile(): + # we write a copy of the xconfig file just to have a record of the original + # input. + try: + xconfig_file_out = open(args.config_dir + "/xconfig") + except: + sys.exit("{0}: error opening file {1}/xconfig for output".format( + sys.argv[0], args.config_dir)) + try: + xconfig_file_in = open(args.xconfig_file) + except: + sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], args.config_dir)) + + print("# This file was copied from {0} by {1}. It is the source\n" + "# from which the config files in this directory were generated.\n" + "# Full command line was:\n" + "# {2}".format(args.xconfig_file, sys.argv[0], ' '.join(sys.argv)), + file=xconfig_file_out) + + while True: + line = xconfig_file_in.readline() + if line == '': + break + print(line.strip(), file=xconfig_file_out) + xconfig_file_out.close() + xconfig_file_in.close() + + +def Main(): + args = GetArgs() + + BackUpXconfigFile() + + + try: + f = + shutil.copyfile(args.xconfig_file, args.xconfig_dir + + MakeConfigs(config_dir = args.config_dir, + splice_indexes_string = args.splice_indexes, + feat_dim = args.feat_dim, ivector_dim = args.ivector_dim, + num_targets = args.num_targets, + add_lda = args.add_lda, + cnn_layer = args.cnn_layer, + cnn_bottleneck_dim = args.cnn_bottleneck_dim, + cepstral_lifter = args.cepstral_lifter, + nonlin_type = args.nonlin_type, + nonlin_input_dim = args.nonlin_input_dim, + nonlin_output_dim = args.nonlin_output_dim, + subset_dim = args.subset_dim, + nonlin_output_dim_init = args.nonlin_output_dim_init, + nonlin_output_dim_final = args.nonlin_output_dim_final, + use_presoftmax_prior_scale = args.use_presoftmax_prior_scale, + final_layer_normalize_target = args.final_layer_normalize_target, + include_log_softmax = args.include_log_softmax, + add_final_sigmoid = args.add_final_sigmoid, + xent_regularize = args.xent_regularize, + xent_separate_forward_affine = args.xent_separate_forward_affine, + self_repair_scale = args.self_repair_scale_nonlinearity, + objective_type = args.objective_type) + +if __name__ == "__main__": + Main() From e96df73c9a1f741702ceba3e70a48391ebbb651f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 2 Nov 2016 20:25:28 -0400 Subject: [PATCH 05/12] some partial changes --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 52 +++++++++- egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 2 +- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 99 ++++++++++++++++--- 3 files changed, 139 insertions(+), 14 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index 97f8c4846b6..e3d50115b03 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -278,6 +278,21 @@ def GetFullConfig(self, all_layers): return ans +# Converts a line as parsed by ParseConfigLine() into a first +# token e.g. 'input-layer' and a key->value map, into +# an objet inherited from XconfigLayerBase. +# 'prev_names' is a list of previous layer names, it's needed +# to parse things like '[-1]' (meaning: the previous layer) +# when they appear in Desriptors. +def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): + if first_token == 'input': + return XconfigInputLayer(first_token, key_to_value, prev_names) + else: + raise Exception("Error parsing xconfig line (no such layer type): " + + first_token + ' ' + + ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()])) + + # Uses ParseConfigLine() to turn a config line that has been parsed into # a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, # and then turns this into an object representing that line of the config file. @@ -285,9 +300,42 @@ def GetFullConfig(self, all_layers): # config file. def ConfigLineToObject(config_line, prev_names = None): (first_token, key_to_value) = ParseConfigLine(config_line) + return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names) - if first_token == 'input': - return XconfigInputLayer(first_token, key_to_value) + + +# This function reads an xconfig file and returns it as a list of layers +# (usually we use the variable name 'all_layers' elsewhere for this). +# It will die if the xconfig file is empty or if there was +# some error parsing it. +def ReadXconfigFile(xconfig_filename): + try: + f = open(xconfig_filename, 'r') + except Exception as e: + sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format( + sys.argv[0], xconfig_filename, repr(e))) + prev_names = [] + all_layers = [] + while True: + line = f.readline() + if line == '': + break + x = ParseConfigLine(config_line) + if x is None: + continue # line was blank or only comments. + (first_token, key_to_value) = x + # the next call will raise an easy-to-understand exception if + # it fails. + this_layer = ParsedLineToXconfigLayer(first_token, + key_to_value, + prev_names) + prev_names.append(this_layer.Name()) + all_layers.append(this_layer) + if len(all_layers) == 0: + raise Exception("{0}: xconfig file '{1}' is empty".format( + sys.argv[0], xconfig_filename)) + f.close() + return all_layers def TestLayers(): diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py index 17a25f9fbb4..fbdc011296f 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -407,7 +407,7 @@ def TokenizeDescriptor(descriptor_string, # Note: spaces are allowed in the field names but = signs are # disallowed, which is why it's possible to parse them. # This function also removes comments (anything after '#'). -# As a special case, this function will return NULL if the line +# As a special case, this function will return None if the line # is empty after removing spaces. def ParseConfigLine(orig_config_line): # Remove comments. diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 316a4bb4cb3..a4192685622 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -10,7 +10,7 @@ import copy import imp import ast - +from collections import defaultdict sys.path.insert(0, 'steps/nnet3/libs/') from xconfig_lib import * @@ -356,23 +356,23 @@ def MakeConfigs(config_dir, splice_indexes_string, PrintConfig(key, config_files[key]) -def BackUpXconfigFile(): +def BackUpXconfigFile(xconfig_file, config_dir): # we write a copy of the xconfig file just to have a record of the original # input. try: - xconfig_file_out = open(args.config_dir + "/xconfig") + xconfig_file_out = open(config_dir + "/xconfig") except: sys.exit("{0}: error opening file {1}/xconfig for output".format( - sys.argv[0], args.config_dir)) + sys.argv[0], config_dir)) try: - xconfig_file_in = open(args.xconfig_file) + xconfig_file_in = open(xconfig_file) except: - sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], args.config_dir)) + sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], config_dir)) - print("# This file was copied from {0} by {1}. It is the source\n" - "# from which the config files in this directory were generated.\n" - "# Full command line was:\n" - "# {2}".format(args.xconfig_file, sys.argv[0], ' '.join(sys.argv)), + print("# This file was created by the command:\n" + "# {0}\n" + "# It is a copy of the source from which the config files in " + "# this directory were generated.\n".format(' '.join(sys.argv)), file=xconfig_file_out) while True: @@ -384,11 +384,88 @@ def BackUpXconfigFile(): xconfig_file_in.close() +def WriteExpandedXconfigFile(config_dir, all_layers): + try: + xconfig_file_out = open(config_dir + "/xconfig.expanded") + except: + sys.exit("{0}: error opening file {1}/xconfig.expanded for output".format( + sys.argv[0], config_dir)) + + print("# This file was created by {0}. It contains the same content as\n" + "# ./xconfig but it was parsed, default config values were set, and\n" + "# it was printed from the internal representation.\n".format(sys.argv[0]), + file=xconfig_file_out) + + for layer in all_layers: + print(str(layer), file=xconfig_file_out) + xconfig_file_out.close() + + +# This function returns a map from config-file basename +# e.g. 'init', 'ref', 'layer1' to a documentation string that goes +# at the top of the file. +def GetConfigHeaders(): + ans = defaultdict(str) # resulting dict will default to the empty string + # for any config files not explicitly listed here. + ans['init'] = ("# This file was created by the command:\n" + "# " + ' '.join(sys.argv) + "\n" + "# It contains the input of the network and is used in\n" + "# accumulating stats for an LDA-like transform of the\n" + "# input features.\n"); + ans['ref'] = ("# This file was created by the command:\n" + "# " + ' '.join(sys.argv) + "\n" + "# It contains the entire neural network, but with those\n" + "# components that would normally require fixed vectors/matrices\n" + "# read from disk, replaced with random initialization\n" + "# (this applies to the LDA-like transform and the\n" + "# presoftmax-prior-scale, if applicable). This file\n" + "# is used only to work out the left-context and right-context\n" + "# of the network.\n"); + ans['all'] = ("# This file was created by the command:\n" + "# " + ' '.join(sys.argv) + "\n" + "# It contains the entire neural network. It might not be used\n" + "# in the current scripts; it's provided for forward compatibility\n" + "# to possible future changes.\n") + + # Note: currently we just copy all lines that were going to go to 'all', into + # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py + ans['layer1'] = ("# This file was created by the command:\n" + "# " + ' '.join(sys.argv) + "\n" + "# It contains the configuration of the entire neural network.\n" + "# The contents are the same\n" + "# as 'all.config'. The reason this file is named this way (and\n" + "# that the config file `num_hidden_layers` contains 1, even though\n" + "# this file may really contain more than 1 hidden layer), is\n" + "# historical... we used to create networks by adding hidden layers\n" + "# one by one (discriminative pretraining), but more recently we\n" + "# have found that it's better to add them all at once. This file\n" + "# exists to enable the older training scripts to work. Note:\n" + "# it contains the inputs of the neural network even though it doesn't\n" + "# have to (since they are included in 'init.config'). This will\n" + "# give us the flexibility to change the scripts in future.\n"); + return ans; + + + + +# This is where most of the work of this program happens. +def WriteConfigFiles(config_dir, all_layers): + config_basename_to_lines = defaultdict(list)2 + + config_basename_to_header = GetConfigHeaders() + + + + + def Main(): args = GetArgs() - BackUpXconfigFile() + BackUpXconfigFile(args.xconfig_file, args.config_dir) + + all_layers = ReadXconfigFile(args.xconfig_file) + WriteExpandedXconfigFile(args.config_dir all_layers) try: f = From 0886454975bf8113abadd75c5166f85f9f42d4ed Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 3 Nov 2016 17:40:36 -0400 Subject: [PATCH 06/12] some minor reorganization --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 270 +++++---- egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py | 129 ++++- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 528 +++++------------- 3 files changed, 383 insertions(+), 544 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index e3d50115b03..c8511600b6c 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -9,90 +9,6 @@ import argparse from xconfig_lib import * -# Given a list of objects of type XconfigLayerBase ('all_layers'), -# including at least the layers preceding 'current_layer' (and maybe -# more layers), return the names of layers preceding 'current_layer' -# This will be used in parsing expressions like [-1] in descriptors -# (which is an alias for the previous layer). -def GetPrevNames(all_layers, current_layer): - assert current_layer in all_layers - prev_names = [] - for layer in all_layers: - if layer is current_layer: - break - prev_names.append(layer.Name()) - return prev_names - -# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like -# 'lstm2.memory_cell', into a dimension. 'all_layers' is a vector of objects -# inheriting from XconfigLayerBase. 'current_layer' is provided so that the -# function can make sure not to look in layers that appear *after* this layer -# (because that's not allowed). -def GetDimFromLayerName(all_layers, current_layer, full_layer_name): - assert isinstance(full_layer_name, str) - split_name = full_layer_name.split('.') - if len(split_name) == 0: - raise Exception("Bad layer name: " + full_layer_name) - layer_name = split_name[0] - if len(split_name) == 1: - qualifier = None - else: - # we probably expect len(split_name) == 2 in this case, - # but no harm in allowing dots in the qualifier. - qualifier = '.'.join(split_name[1:]) - - for layer in all_layers: - if layer is current_layer: - break - if layer.Name() == layer_name: - if not qualifier in layer.Qualifiers(): - raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( - layer_name, qualifier)) - return layer.OutputDim(qualifier) - # No such layer was found. - if layer_name in [ layer.Name() for layer in all_layers ]: - raise Exception("Layer '{0}' was requested before it appeared in " - "the xconfig file (circular dependencies or out-of-order " - "layers".format(layer_name)) - else: - raise Exception("No such layer: '{0}'".format(layer_name)) - - -# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like -# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple -# component-node name) that can appear in the generated config file. 'all_layers' is a vector of objects -# inheriting from XconfigLayerBase. 'current_layer' is provided so that the -# function can make sure not to look in layers that appear *after* this layer -# (because that's not allowed). -def GetStringFromLayerName(all_layers, current_layer, full_layer_name): - assert isinstance(full_layer_name, str) - split_name = full_layer_name.split('.') - if len(split_name) == 0: - raise Exception("Bad layer name: " + full_layer_name) - layer_name = split_name[0] - if len(split_name) == 1: - qualifier = None - else: - # we probably expect len(split_name) == 2 in this case, - # but no harm in allowing dots in the qualifier. - qualifier = '.'.join(split_name[1:]) - - for layer in all_layers: - if layer is current_layer: - break - if layer.Name() == layer_name: - if not qualifier in layer.Qualifiers(): - raise Exception("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( - layer_name, qualifier)) - return layer.OutputName(qualifier) - # No such layer was found. - if layer_name in [ layer.Name() for layer in all_layers ]: - raise Exception("Layer '{0}' was requested before it appeared in " - "the xconfig file (circular dependencies or out-of-order " - "layers".format(layer_name)) - else: - raise Exception("No such layer: '{0}'".format(layer_name)) - # A base-class for classes representing layers of xconfig files. @@ -105,40 +21,73 @@ class XconfigLayerBase(object): # The only required and 'special' values that are dealt with directly at this level, are # 'name' and 'input'. # The rest are put in self.config and are dealt with by the child classes' init functions. - # prev_names is an array of the names (xxx in 'name=xxx') of previous - # lines of the config file. + # all_layers is an array of objects inheriting XconfigLayerBase for all previously + # parsed layers. - def __init__(self, first_token, key_to_value, prev_names = None): + def __init__(self, first_token, key_to_value, all_layers): self.layer_type = first_token if not 'name' in key_to_value: - raise Exception("Expected 'name' to be specified.") + raise RuntimeError("Expected 'name' to be specified.") self.name = key_to_value['name'] if not IsValidLineName(self.name): - raise Exception("Invalid value: name={0}".format(key_to_value['name'])) + raise RuntimeError("Invalid value: name={0}".format(key_to_value['name'])) # the following, which should be overridden in the child class, sets # default config parameters in self.config. self.SetDefaultConfigs() # The following is not to be reimplemented in child classes; - # sets the config files to those specified by the user. - self._SetConfigs(key_to_value) + # it sets the config values to those specified by the user, and + # parses any Descriptors. + self.SetConfigs(key_to_value, all_layers) # the following, which should be overridden in the child class, checks # that the config parameters that have been set are reasonable. self.CheckConfigs() # We broke this code out of __init__ for clarity. - def _SetConfigs(self, key_to_value): + def SetConfigs(self, key_to_value, all_layers): # the child-class constructor will deal with the configuration values # in a more specific way. for key,value in key_to_value.items(): - if key != 'name' and key != 'input': + if key != 'name': if not key in self.config: - raise Exception("Configuration value {0}={1} was not expected in " + raise RuntimeError("Configuration value {0}={1} was not expected in " "layer of type {2}".format(key, value, self.layer_type)) self.config[key] = ConvertValueToType(key, type(self.config[key]), value) + self.descriptors = dict() + self.descriptor_dims = dict() + # Parse Descriptors and get their dims and their 'final' string form. + # Put them as 4-tuples (descriptor, string, normalized-string, final-string) + # in self.descriptors[key] + for key in self.GetDescriptorConfigs(): + if not key in self.config: + raise RuntimeError("{0}: object of type {1} needs to override " + "GetDescriptorConfigs()".format(sys.argv[0], + str(type(self)))) + descriptor_string = self.config[key] # input string. + assert isinstance(descriptor_string, str) + desc = self.ConvertToDescriptor(descriptor_string, all_layers) + desc_dim = self.GetDimForDescriptor(desc, all_layers) + desc_norm_str = desc.str() + # desc_output_str contains the "final" component names, those that + # appear in the actual config file (i.e. not names like + # 'layer.qualifier'); that's how it differs from desc_norm_str. + # Note: it's possible that the two strings might be the same in + # many, even most, cases-- it depends whether OutputName(self, qualifier) + # returns self.Name() + '.' + qualifier when qualifier is not None. + # That's up to the designer of the layer type. + desc_output_str = self.GetStringForDescriptor(desc, all_layers) + self.descriptors[key] = (desc, desc_dim, desc_norm_str, desc_output_str) + # the following helps to check the code by parsing it again. + desc2 = self.ConvertToDescriptor(desc_norm_str, all_layers) + desc_norm_str2 = desc2.str() + # if the following ever fails we'll have to do some debugging. + if desc_norm_str != desc_norm_str2: + raise RuntimeError("Likely code error: '{0}' != '{1}'".format( + desc_norm_str, desc_norm_str2)) + # This function converts 'this' to a string which could be printed to an # xconfig file; in xconfig_to_configs.py we actually expand all the lines to # strings and write it as xconfig.expanded as a reference (so users can @@ -152,6 +101,17 @@ def str(self): def __str__(self): return self.str() + + # This function converts any config variables in self.config which + # correspond to Descriptors, into a 'normalized form' derived from parsing + # them as Descriptors, replacing things like [-1] with the actual layer + # names, and regenerating them as strings. We stored this when the + # object was initialized, in self.descriptors; this function just copies them + # back to the config. + def NormalizeDescriptors(self): + for key,tuple in self.descriptors.items(): + self.config[key] = tuple[2] # desc_norm_str + # This function, which is a convenience function intended to be called from # child classes, converts a string representing a descriptor # ('descriptor_string') into an object of type Descriptor, and returns it. @@ -162,15 +122,16 @@ def ConvertToDescriptor(self, descriptor_string, all_layers): prev_names = GetPrevNames(all_layers, self) tokens = TokenizeDescriptor(descriptor_string, prev_names) pos = 0 - (self.input, pos) = ParseNewDescriptor(tokens, pos, prev_names) + (descriptor, pos) = ParseNewDescriptor(tokens, pos, prev_names) # note: 'pos' should point to the 'end of string' marker # that terminates 'tokens'. if pos != len(tokens) - 1: - raise Exception("Parsing Descriptor, saw junk at end: " + + raise RuntimeError("Parsing Descriptor, saw junk at end: " + ' '.join(tokens[pos:-1])) + return descriptor # Returns the dimension of a Descriptor object. - # This is a convenience function provided for use in child classes; + # This is a convenience function used in SetConfigs. def GetDimForDescriptor(self, descriptor, all_layers): layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name) return descriptor.Dim(layer_to_dim_func) @@ -184,24 +145,37 @@ def GetStringForDescriptor(self, descriptor, all_layers): # Name() returns the name of this layer, e.g. 'affine1'. It does not # necessarily correspond to a component name. - def Name(): + def Name(self): return self.name - ###### Functions that should be overridden by the child class: ##### + ###### Functions that might be overridden by the child class: ##### # child classes should override this. - def SetDefaultConfigs(): - raise Exception("Child classes must override SetDefaultConfigs().") + def SetDefaultConfigs(self): + raise RuntimeError("Child classes must override SetDefaultConfigs().") # child classes should override this. - def CheckConfigs(): + def CheckConfigs(self): pass + # This function, which may be (but usually will not have to be) overrideden + # by child classes, returns a list of keys/names of config variables that + # will be interpreted as Descriptors. It is used in the function + # 'NormalizeDescriptors()'. This implementation will work + # layer types whose only Descriptor-valued config is 'input'. + + # If a child class adds more config variables that are interpreted as + # descriptors (e.g. to read auxiliary inputs), or does not have an input + # (e.g. the XconfigInputLayer), it should override this function's + # implementation to something like: `return ['input', 'input2']` + def GetDescriptorConfigs(self): + return [ 'input' ] + # Returns a list of all qualifiers (meaning auxiliary outputs) that this # layer supports. These are either 'None' for the regular output, or a # string (e.g. 'projection' or 'memory_cell') for any auxiliary outputs that # the layer might provide. Most layer types will not need to override this. - def Qualifiers(): + def Qualifiers(self): return [ None ] # Called with qualifier == None, this returns the component-node name of the @@ -213,13 +187,13 @@ def Qualifiers(): # a highway LSTM you need the memory-cell of a layer, so you might allow # qualifier='memory_cell' for such a layer type, and it would return the # component node or a suitable Descriptor: something like 'lstm3.c_t' - def OutputName(qualifier = None): - raise Exception("Child classes must override OutputName()") + def OutputName(self, qualifier = None): + raise RuntimeError("Child classes must override OutputName()") # The dimension that this layer outputs. The 'qualifier' parameter is for # layer types which support auxiliary outputs. - def OutputDim(qualifier = None): - raise Exception("Child classes must override OutputDim()") + def OutputDim(self, qualifier = None): + raise RuntimeError("Child classes must override OutputDim()") # This function returns lines destined for the 'full' config format, as # would be read by the C++ programs. @@ -229,11 +203,8 @@ def OutputDim(qualifier = None): # [ ('init', 'input-node name=input dim=40'), # ('ref', 'input-node name=input dim=40') ] # which would be written to config_dir/init.config and config_dir/ref.config. - # - # 'all_layers' is a vector of objects inheriting from XconfigLayerBase, - # which is used to get the component names and dimensions at the input. - def GetFullConfig(self, all_layers): - raise Exception("Child classes must override GetFullConfig()") + def GetFullConfig(self): + raise RuntimeError("Child classes must override GetFullConfig()") # This class is for lines like @@ -252,17 +223,20 @@ def SetDefaultConfigs(self): def CheckConfigs(self): if self.config['dim'] <= 0: - raise Exception("Dimension of input-layer '{0}' is not set".format(self.name)) + raise RuntimeError("Dimension of input-layer '{0}' is not set".format(self.name)) + + def GetDescriptorConfigs(self): + return [] # there is no 'input' field in self.config. - def OutputName(qualifier = None): + def OutputName(self, qualifier = None): assert qualifier is None return self.name - def OutputDim(qualifier = None): + def OutputDim(self, qualifier = None): assert qualifier is None return self.config['dim'] - def GetFullConfig(self, all_layers): + def GetFullConfig(self): # the input layers need to be printed in 'init.config' (which # initializes the neural network prior to the LDA), in 'ref.config', # which is a version of the config file used for getting left and right @@ -271,13 +245,63 @@ def GetFullConfig(self, all_layers): # In 'full.config' we write everything, this is just for reference, # and also for cases where we don't use the LDA-like transform. ans = [] - for config_name in [ 'init', 'ref', 'full' ]: + for config_name in [ 'init', 'ref', 'all' ]: ans.append( (config_name, 'input-node name={0} dim={1}'.format(self.name, self.config['dim']))) return ans + +# This class is for lines like +# 'output name=output input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))' +# This is for outputs that are not really output "layers" (there is no affine transform or +# nonlinearity), they just directly map to an output-node in nnet3. +class XconfigTrivialOutputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'output' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]' } + + def CheckConfigs(self): + pass # nothing to check; descriptor-parsing can't happen in this function. + + def OutputName(self, qualifier = None): + assert qualifier is None + return self.name + + def OutputDim(self, qualifier = None): + assert qualifier is None + # note: each value of self.descriptors is (descriptor, dim, normalized-string, output-string). + return self.descriptors['input'][1] + + def GetFullConfig(self): + # the input layers need to be printed in 'init.config' (which + # initializes the neural network prior to the LDA), in 'ref.config', + # which is a version of the config file used for getting left and right + # context (it doesn't read anything for the LDA-like transform and/or + # presoftmax-prior-scale components) + # In 'full.config' we write everything, this is just for reference, + # and also for cases where we don't use the LDA-like transform. + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'output-string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of + descriptor_output_str = self.descriptors['input'][3] + + for config_name in [ 'ref', 'all' ]: + ans.append( (config_name, + 'output-node name={0} input={1}'.format( + self.name, descriptor_output_str))) + return ans + + # Converts a line as parsed by ParseConfigLine() into a first # token e.g. 'input-layer' and a key->value map, into # an objet inherited from XconfigLayerBase. @@ -287,8 +311,10 @@ def GetFullConfig(self, all_layers): def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): if first_token == 'input': return XconfigInputLayer(first_token, key_to_value, prev_names) + elif first_token == 'output': + return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names) else: - raise Exception("Error parsing xconfig line (no such layer type): " + + raise RuntimeError("Error parsing xconfig line (no such layer type): " + first_token + ' ' + ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()])) @@ -314,13 +340,12 @@ def ReadXconfigFile(xconfig_filename): except Exception as e: sys.exit("{0}: error reading xconfig file '{1}'; error was {2}".format( sys.argv[0], xconfig_filename, repr(e))) - prev_names = [] all_layers = [] while True: line = f.readline() if line == '': break - x = ParseConfigLine(config_line) + x = ParseConfigLine(line) if x is None: continue # line was blank or only comments. (first_token, key_to_value) = x @@ -328,11 +353,10 @@ def ReadXconfigFile(xconfig_filename): # it fails. this_layer = ParsedLineToXconfigLayer(first_token, key_to_value, - prev_names) - prev_names.append(this_layer.Name()) + all_layers) all_layers.append(this_layer) if len(all_layers) == 0: - raise Exception("{0}: xconfig file '{1}' is empty".format( + raise RuntimeError("{0}: xconfig file '{1}' is empty".format( sys.argv[0], xconfig_filename)) f.close() return all_layers diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py index fbdc011296f..782e6ebd3e1 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py @@ -1,3 +1,10 @@ +# Copyright 2016 Johns Hopkins University (Author: Daniel Povey). +# License: Apache 2.0. + +# This library contains various utilities that are involved in processing +# of xconfig -> config conversion. It contains "generic" lower-level code +# while xconfig_layers.py contains the code specific to layer types. + from __future__ import print_function import subprocess import logging @@ -8,28 +15,92 @@ import time import argparse +# [utility function used in xconfig_layers.py] +# Given a list of objects of type XconfigLayerBase ('all_layers'), +# including at least the layers preceding 'current_layer' (and maybe +# more layers), return the names of layers preceding 'current_layer' +# This will be used in parsing expressions like [-1] in descriptors +# (which is an alias for the previous layer). +def GetPrevNames(all_layers, current_layer): + prev_names = [] + for layer in all_layers: + if layer is current_layer: + break + prev_names.append(layer.Name()) + return prev_names + +# [utility function used in xconfig_layers.py] +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a dimension. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetDimFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise RuntimeError("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputDim(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise RuntimeError("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise RuntimeError("No such layer: '{0}'".format(layer_name)) + + +# [utility function used in xconfig_layers.py] +# this converts a layer-name like 'ivector' or 'input', or a sub-layer name like +# 'lstm2.memory_cell', into a descriptor (usually, but not required to be a simple +# component-node name) that can appear in the generated config file. 'all_layers' is a vector of objects +# inheriting from XconfigLayerBase. 'current_layer' is provided so that the +# function can make sure not to look in layers that appear *after* this layer +# (because that's not allowed). +def GetStringFromLayerName(all_layers, current_layer, full_layer_name): + assert isinstance(full_layer_name, str) + split_name = full_layer_name.split('.') + if len(split_name) == 0: + raise RuntimeError("Bad layer name: " + full_layer_name) + layer_name = split_name[0] + if len(split_name) == 1: + qualifier = None + else: + # we probably expect len(split_name) == 2 in this case, + # but no harm in allowing dots in the qualifier. + qualifier = '.'.join(split_name[1:]) + + for layer in all_layers: + if layer is current_layer: + break + if layer.Name() == layer_name: + if not qualifier in layer.Qualifiers(): + raise RuntimeError("Layer '{0}' has no such qualifier: '{1}' ({0}.{1})".format( + layer_name, qualifier)) + return layer.OutputName(qualifier) + # No such layer was found. + if layer_name in [ layer.Name() for layer in all_layers ]: + raise RuntimeError("Layer '{0}' was requested before it appeared in " + "the xconfig file (circular dependencies or out-of-order " + "layers".format(layer_name)) + else: + raise RuntimeError("No such layer: '{0}'".format(layer_name)) -class StrToBoolAction(argparse.Action): - """ A custom action to convert bools from shell format i.e., true/false - to python format i.e., True/False """ - def __call__(self, parser, namespace, values, option_string=None): - if values == "true": - setattr(namespace, self.dest, True) - elif values == "false": - setattr(namespace, self.dest, False) - else: - raise Exception("Unknown value {0} for --{1}".format(values, self.dest)) - -class NullstrToNoneAction(argparse.Action): - """ A custom action to convert empty strings passed by shell - to None in python. This is necessary as shell scripts print null strings - when a variable is not specified. We could use the more apt None - in python. """ - def __call__(self, parser, namespace, values, option_string=None): - if values.strip() == "": - setattr(namespace, self.dest, None) - else: - setattr(namespace, self.dest, values) # This function, used in converting string values in config lines to # configuration values in self.config in layers, attempts to @@ -57,7 +128,7 @@ def ConvertValueToType(key, dest_type, string_value): raise Exception("Invalid configuration value {0}={1} (expected int)".format( key, string_value)) elif dest_type == type(str()): - return sting_value + return string_value @@ -123,10 +194,12 @@ def __init__(self, def ConfigString(self, layer_to_string): if self.operator is None: assert len(self.items) == 1 and isinstance(self.items[0], str) - return layer_to_node(self.items[0]) + return layer_to_string(self.items[0]) else: assert isinstance(self.operator, str) - return self.operator + '(' + ', '.join([OutputString(item, layer_to_node) for item in self.items]) + ')' + return self.operator + '(' + ', '.join( + [ item.ConfigString(layer_to_string) if isinstance(item, Descriptor) else str(item) + for item in self.items]) + ')' def str(self): if self.operator is None: @@ -154,18 +227,18 @@ def Dim(self, layer_to_dim): elif self.operator in [ 'Sum', 'Failover', 'IfDefined', 'Switch' ]: # these are all operators for which all args are descriptors # and must have the same dim. - dim = self.items[0].Dim() + dim = self.items[0].Dim(layer_to_dim) for desc in self.items[1:]: - next_dim = desc.Dim() + next_dim = desc.Dim(layer_to_dim) if next_dim != dim: raise Exception("In descriptor {0}, different fields have different " "dimensions: {1} != {2}".format(self.str(), dim, next_dim)) return dim elif self.operator in [ 'Offset', 'Round', 'ReplaceIndex' ]: # for these operators, only the 1st arg is relevant. - return self.items[0].Dim() + return self.items[0].Dim(layer_to_dim) elif self.operator == 'Append': - return sum([ x.Dim() for x in self.items]) + return sum([ x.Dim(layer_to_dim) for x in self.items]) else: raise Exception("Unknown operator {0}".format(self.operator)) diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index a4192685622..2684e062e8e 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -13,23 +13,26 @@ from collections import defaultdict sys.path.insert(0, 'steps/nnet3/libs/') +# the following is in case we weren't running this from the normal directory. +sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/') + from xconfig_lib import * from xconfig_layers import * def GetArgs(): # we add compulsary arguments as named arguments for readability - parser = argparse.ArgumentParser(description="Reads an xconfig file and creates config files " - "for neural net creation and training", - epilog="Search egs/*/*/local/nnet3/*sh for examples") - - parser.add_argument("--self-repair-scale-nonlinearity", type=float, - help="A non-zero value activates the self-repair mechanism in " - "nonlinearities (larger -> faster self-repair)", default=1.0e-05) - parser.add_argument("xconfig_file", - help="Filename of input xconfig file") - parser.add_argument("config_dir", - help="Directory to write config files and variables") + parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files ' + 'for neural net creation and training', + epilog='Search egs/*/*/local/nnet3/*sh for examples') + + parser.add_argument('--self-repair-scale-nonlinearity', type=float, + help='A non-zero value activates the self-repair mechanism in ' + 'nonlinearities (larger -> faster self-repair)', default=1.0e-05) + parser.add_argument('xconfig_file', + help='Filename of input xconfig file') + parser.add_argument('config_dir', + help='Directory to write config files and variables') print(' '.join(sys.argv)) @@ -42,332 +45,37 @@ def CheckArgs(args): if not os.path.exists(args.config_dir): os.makedirs(args.config_dir) if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1: - sys.exit("{0}: invalid option --self-repair-scale-nonlinearity={1}".format( + sys.exit('{0}: invalid option --self-repair-scale-nonlinearity={1}'.format( sys.argv[0], args.self_repair_scale_nonlinearity)) return args -def AddConvMaxpLayer(config_lines, name, input, args): - if '3d-dim' not in input: - raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.") - - input = nodes.AddConvolutionLayer(config_lines, name, input, - input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], - args.filt_x_dim, args.filt_y_dim, - args.filt_x_step, args.filt_y_step, - args.num_filters, input['vectorization']) - - if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1: - input = nodes.AddMaxpoolingLayer(config_lines, name, input, - input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], - args.pool_x_size, args.pool_y_size, args.pool_z_size, - args.pool_x_step, args.pool_y_step, args.pool_z_step) - - return input - -# The ivectors are processed through an affine layer parallel to the CNN layers, -# then concatenated with the CNN output and passed to the deeper part of the network. -def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0): - cnn_args = ParseCnnString(cnn_layer) - num_cnn_layers = len(cnn_args) - # We use an Idct layer here to convert MFCC to FBANK features - nnet3_train_lib.WriteIdctMatrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat") - prev_layer_output = {'descriptor': "input", - 'dimension': feat_dim} - prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat') - - list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes] - splice_descriptor = "Append({0})".format(", ".join(list)) - cnn_input_dim = len(splice_indexes) * feat_dim - prev_layer_output = {'descriptor': splice_descriptor, - 'dimension': cnn_input_dim, - '3d-dim': [len(splice_indexes), feat_dim, 1], - 'vectorization': 'yzx'} - - for cl in range(0, num_cnn_layers): - prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl]) - - if cnn_bottleneck_dim > 0: - prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "") - - if ivector_dim > 0: - iv_layer_output = {'descriptor': 'ReplaceIndex(ivector, t, 0)', - 'dimension': ivector_dim} - iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "") - prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor']) - prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension'] - - return prev_layer_output - -def PrintConfig(file_name, config_lines): - f = open(file_name, 'w') - f.write("\n".join(config_lines['components'])+"\n") - f.write("\n#Component nodes\n") - f.write("\n".join(config_lines['component-nodes'])+"\n") - f.close() - -def ParseCnnString(cnn_param_string_list): - cnn_parser = argparse.ArgumentParser(description="cnn argument parser") - - cnn_parser.add_argument("--filt-x-dim", required=True, type=int) - cnn_parser.add_argument("--filt-y-dim", required=True, type=int) - cnn_parser.add_argument("--filt-x-step", type=int, default = 1) - cnn_parser.add_argument("--filt-y-step", type=int, default = 1) - cnn_parser.add_argument("--num-filters", required=True, type=int) - cnn_parser.add_argument("--pool-x-size", type=int, default = 1) - cnn_parser.add_argument("--pool-y-size", type=int, default = 1) - cnn_parser.add_argument("--pool-z-size", type=int, default = 1) - cnn_parser.add_argument("--pool-x-step", type=int, default = 1) - cnn_parser.add_argument("--pool-y-step", type=int, default = 1) - cnn_parser.add_argument("--pool-z-step", type=int, default = 1) - - cnn_args = [] - for cl in range(0, len(cnn_param_string_list)): - cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl]))) - - return cnn_args - -def ParseSpliceString(splice_indexes): - splice_array = [] - left_context = 0 - right_context = 0 - split1 = splice_indexes.split(); # we already checked the string is nonempty. - if len(split1) < 1: - raise Exception("invalid splice-indexes argument, too short: " - + splice_indexes) - try: - for string in split1: - split2 = string.split(",") - if len(split2) < 1: - raise Exception("invalid splice-indexes argument, too-short element: " - + splice_indexes) - int_list = [] - for int_str in split2: - int_list.append(int(int_str)) - if not int_list == sorted(int_list): - raise Exception("elements of splice-indexes must be sorted: " - + splice_indexes) - left_context += -int_list[0] - right_context += int_list[-1] - splice_array.append(int_list) - except ValueError as e: - raise Exception("invalid splice-indexes argument " + splice_indexes + str(e)) - left_context = max(0, left_context) - right_context = max(0, right_context) - - return {'left_context':left_context, - 'right_context':right_context, - 'splice_indexes':splice_array, - 'num_hidden_layers':len(splice_array) - } - -# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script. -def MakeConfigs(config_dir, splice_indexes_string, - cnn_layer, cnn_bottleneck_dim, cepstral_lifter, - feat_dim, ivector_dim, num_targets, add_lda, - nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim, - nonlin_output_dim_init, nonlin_output_dim_final, - use_presoftmax_prior_scale, - final_layer_normalize_target, - include_log_softmax, - add_final_sigmoid, - xent_regularize, - xent_separate_forward_affine, - self_repair_scale, - objective_type): - - parsed_splice_output = ParseSpliceString(splice_indexes_string.strip()) - - left_context = parsed_splice_output['left_context'] - right_context = parsed_splice_output['right_context'] - num_hidden_layers = parsed_splice_output['num_hidden_layers'] - splice_indexes = parsed_splice_output['splice_indexes'] - input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim - - if xent_separate_forward_affine: - if splice_indexes[-1] != [0]: - raise Exception("--xent-separate-forward-affine option is supported only if the last-hidden layer has no splicing before it. Please use a splice-indexes with just 0 as the final splicing config.") - - prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir) - - config_lines = {'components':[], 'component-nodes':[]} - - config_files={} - prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], ivector_dim) - - # Add the init config lines for estimating the preconditioning matrices - init_config_lines = copy.deepcopy(config_lines) - init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to') - init_config_lines['components'].insert(0, '# preconditioning matrix computation') - nodes.AddOutputLayer(init_config_lines, prev_layer_output) - config_files[config_dir + '/init.config'] = init_config_lines - - if cnn_layer is not None: - prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, - feat_dim, splice_indexes[0], ivector_dim) - - if add_lda: - prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat') - - left_context = 0 - right_context = 0 - # we moved the first splice layer to before the LDA.. - # so the input to the first affine layer is going to [0] index - splice_indexes[0] = [0] - - if not nonlin_output_dim is None: - nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers - elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1: - raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.") - else: - # computes relu-dim for each hidden layer. They increase geometrically across layers - factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1 - nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)] - assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error - nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified - - for i in range(0, num_hidden_layers): - # make the intermediate config file for layerwise discriminative training - - # prepare the spliced input - if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0): - try: - zero_index = splice_indexes[i].index(0) - except ValueError: - zero_index = None - # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor - prev_layer_output_descriptor = prev_layer_output['descriptor'] - subset_output = prev_layer_output - if subset_dim > 0: - # if subset_dim is specified the script expects a zero in the splice indexes - assert(zero_index is not None) - subset_node_config = "dim-range-node name=Tdnn_input_{0} input-node={1} dim-offset={2} dim={3}".format(i, prev_layer_output_descriptor, 0, subset_dim) - subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i), - 'dimension' : subset_dim} - config_lines['component-nodes'].append(subset_node_config) - appended_descriptors = [] - appended_dimension = 0 - for j in range(len(splice_indexes[i])): - if j == zero_index: - appended_descriptors.append(prev_layer_output['descriptor']) - appended_dimension += prev_layer_output['dimension'] - continue - appended_descriptors.append('Offset({0}, {1})'.format(subset_output['descriptor'], splice_indexes[i][j])) - appended_dimension += subset_output['dimension'] - prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)), - 'dimension' : appended_dimension} - else: - # this is a normal affine node - pass - - if xent_separate_forward_affine and i == num_hidden_layers - 1: - if xent_regularize == 0.0: - raise Exception("xent-separate-forward-affine=True is valid only if xent-regularize is non-zero") - - if nonlin_type == "relu" : - prev_layer_output_chain = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_chain", - prev_layer_output, nonlin_output_dim, - self_repair_scale = self_repair_scale, - norm_target_rms = final_layer_normalize_target) - - prev_layer_output_xent = nodes.AddAffRelNormLayer(config_lines, "Tdnn_pre_final_xent", - prev_layer_output, nonlin_output_dim, - self_repair_scale = self_repair_scale, - norm_target_rms = final_layer_normalize_target) - elif nonlin_type == "pnorm" : - prev_layer_output_chain = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_chain", - prev_layer_output, nonlin_input_dim, nonlin_output_dim, - norm_target_rms = final_layer_normalize_target) - - prev_layer_output_xent = nodes.AddAffPnormLayer(config_lines, "Tdnn_pre_final_xent", - prev_layer_output, nonlin_input_dim, nonlin_output_dim, - norm_target_rms = final_layer_normalize_target) - else: - raise Exception("Unknown nonlinearity type") - - nodes.AddFinalLayer(config_lines, prev_layer_output_chain, num_targets, - use_presoftmax_prior_scale = use_presoftmax_prior_scale, - prior_scale_file = prior_scale_file, - include_log_softmax = include_log_softmax) - - nodes.AddFinalLayer(config_lines, prev_layer_output_xent, num_targets, - ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format( - 0.5 / xent_regularize), - use_presoftmax_prior_scale = use_presoftmax_prior_scale, - prior_scale_file = prior_scale_file, - include_log_softmax = True, - name_affix = 'xent') - else: - if nonlin_type == "relu": - prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i), - prev_layer_output, nonlin_output_dims[i], - self_repair_scale = self_repair_scale, - norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) - elif nonlin_type == "pnorm": - prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i), - prev_layer_output, nonlin_input_dim, nonlin_output_dim, - norm_target_rms = 1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) - else: - raise Exception("Unknown nonlinearity type") - # a final layer is added after each new layer as we are generating - # configs for layer-wise discriminative training - - # add_final_sigmoid adds a sigmoid as a final layer as alternative - # to log-softmax layer. - # http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression#Softmax_Regression_vs._k_Binary_Classifiers - # This is useful when you need the final outputs to be probabilities between 0 and 1. - # Usually used with an objective-type such as "quadratic". - # Applications are k-binary classification such Ideal Ratio Mask prediction. - nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, - use_presoftmax_prior_scale = use_presoftmax_prior_scale, - prior_scale_file = prior_scale_file, - include_log_softmax = include_log_softmax, - add_final_sigmoid = add_final_sigmoid, - objective_type = objective_type) - if xent_regularize != 0.0: - nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, - ng_affine_options = " param-stddev=0 bias-stddev=0 learning-rate-factor={0} ".format( - 0.5 / xent_regularize), - use_presoftmax_prior_scale = use_presoftmax_prior_scale, - prior_scale_file = prior_scale_file, - include_log_softmax = True, - name_affix = 'xent') - - config_files['{0}/layer{1}.config'.format(config_dir, i+1)] = config_lines - config_lines = {'components':[], 'component-nodes':[]} - - left_context += int(parsed_splice_output['left_context']) - right_context += int(parsed_splice_output['right_context']) - - # write the files used by other scripts like steps/nnet3/get_egs.sh - f = open(config_dir + "/vars", "w") - print('model_left_context=' + str(left_context), file=f) - print('model_right_context=' + str(right_context), file=f) - print('num_hidden_layers=' + str(num_hidden_layers), file=f) - print('num_targets=' + str(num_targets), file=f) - print('add_lda=' + ('true' if add_lda else 'false'), file=f) - print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f) - print('objective_type=' + objective_type, file=f) - f.close() - - # printing out the configs - # init.config used to train lda-mllt train - for key in config_files.keys(): - PrintConfig(key, config_files[key]) + +# # write the files used by other scripts like steps/nnet3/get_egs.sh +# f = open(config_dir + 'vars', 'w') +# print('model_left_context=' + str(left_context), file=f) +# print('model_right_context=' + str(right_context), file=f) +# print('num_hidden_layers=' + str(num_hidden_layers), file=f) +# print('num_targets=' + str(num_targets), file=f) +# print('add_lda=' + ('true' if add_lda else 'false'), file=f) +# print('include_log_softmax=' + ('true' if include_log_softmax else 'false'), file=f) +# print('objective_type=' + objective_type, file=f) +# f.close() + def BackUpXconfigFile(xconfig_file, config_dir): # we write a copy of the xconfig file just to have a record of the original # input. try: - xconfig_file_out = open(config_dir + "/xconfig") + xconfig_file_out = open(config_dir + '/xconfig', 'w') except: - sys.exit("{0}: error opening file {1}/xconfig for output".format( + sys.exit('{0}: error opening file {1}/xconfig for output'.format( sys.argv[0], config_dir)) try: xconfig_file_in = open(xconfig_file) except: - sys.exit("{0}: error opening file {1} for input".format(sys.argv[0], config_dir)) + sys.exit('{0}: error opening file {1} for input'.format(sys.argv[0], config_dir)) print("# This file was created by the command:\n" "# {0}\n" @@ -384,65 +92,90 @@ def BackUpXconfigFile(xconfig_file, config_dir): xconfig_file_in.close() -def WriteExpandedXconfigFile(config_dir, all_layers): +# This functions writes config_dir/xconfig.expanded.1 and +# config_dir/xconfig.expanded.2, showing some of the internal stages of +# processing the xconfig file before turning it into config files. +def WriteExpandedXconfigFiles(config_dir, all_layers): try: - xconfig_file_out = open(config_dir + "/xconfig.expanded") + xconfig_file_out = open(config_dir + '/xconfig.expanded.1', 'w') except: - sys.exit("{0}: error opening file {1}/xconfig.expanded for output".format( + sys.exit('{0}: error opening file {1}/xconfig.expanded.1 for output'.format( sys.argv[0], config_dir)) - print("# This file was created by {0}. It contains the same content as\n" - "# ./xconfig but it was parsed, default config values were set, and\n" - "# it was printed from the internal representation.\n".format(sys.argv[0]), + + print('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '#It contains the same content as ./xconfig but it was parsed and\n' + '#default config values were set.\n' + '# See also ./xconfig.expanded.2\n', file=xconfig_file_out) + + for layer in all_layers: + print(str(layer), file=xconfig_file_out) + xconfig_file_out.close() + + try: + xconfig_file_out = open(config_dir + '/xconfig.expanded.2', 'w') + except: + sys.exit('{0}: error opening file {1}/xconfig.expanded.2 for output'.format( + sys.argv[0], config_dir)) + + print('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the same content as ./xconfig but it was parsed,\n' + '# default config values were set, and Descriptors (input=xxx) were normalized.\n' + '# See also ./xconfig.expanded.1\n\n', file=xconfig_file_out) for layer in all_layers: + layer.NormalizeDescriptors() print(str(layer), file=xconfig_file_out) xconfig_file_out.close() + + # This function returns a map from config-file basename # e.g. 'init', 'ref', 'layer1' to a documentation string that goes # at the top of the file. def GetConfigHeaders(): ans = defaultdict(str) # resulting dict will default to the empty string # for any config files not explicitly listed here. - ans['init'] = ("# This file was created by the command:\n" - "# " + ' '.join(sys.argv) + "\n" - "# It contains the input of the network and is used in\n" - "# accumulating stats for an LDA-like transform of the\n" - "# input features.\n"); - ans['ref'] = ("# This file was created by the command:\n" - "# " + ' '.join(sys.argv) + "\n" - "# It contains the entire neural network, but with those\n" - "# components that would normally require fixed vectors/matrices\n" - "# read from disk, replaced with random initialization\n" - "# (this applies to the LDA-like transform and the\n" - "# presoftmax-prior-scale, if applicable). This file\n" - "# is used only to work out the left-context and right-context\n" - "# of the network.\n"); - ans['all'] = ("# This file was created by the command:\n" - "# " + ' '.join(sys.argv) + "\n" - "# It contains the entire neural network. It might not be used\n" - "# in the current scripts; it's provided for forward compatibility\n" - "# to possible future changes.\n") + ans['init'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the input of the network and is used in\n' + '# accumulating stats for an LDA-like transform of the\n' + '# input features.\n'); + ans['ref'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the entire neural network, but with those\n' + '# components that would normally require fixed vectors/matrices\n' + '# read from disk, replaced with random initialization\n' + '# (this applies to the LDA-like transform and the\n' + '# presoftmax-prior-scale, if applicable). This file\n' + '# is used only to work out the left-context and right-context\n' + '# of the network.\n'); + ans['all'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the entire neural network. It might not be used\n' + '# in the current scripts; it\'s provided for forward compatibility\n' + '# to possible future changes.\n') # Note: currently we just copy all lines that were going to go to 'all', into # 'layer1', to avoid propagating this nastiness to the code in xconfig_layers.py - ans['layer1'] = ("# This file was created by the command:\n" - "# " + ' '.join(sys.argv) + "\n" - "# It contains the configuration of the entire neural network.\n" - "# The contents are the same\n" - "# as 'all.config'. The reason this file is named this way (and\n" - "# that the config file `num_hidden_layers` contains 1, even though\n" - "# this file may really contain more than 1 hidden layer), is\n" - "# historical... we used to create networks by adding hidden layers\n" - "# one by one (discriminative pretraining), but more recently we\n" - "# have found that it's better to add them all at once. This file\n" - "# exists to enable the older training scripts to work. Note:\n" - "# it contains the inputs of the neural network even though it doesn't\n" - "# have to (since they are included in 'init.config'). This will\n" - "# give us the flexibility to change the scripts in future.\n"); + ans['layer1'] = ('# This file was created by the command:\n' + '# ' + ' '.join(sys.argv) + '\n' + '# It contains the configuration of the entire neural network.\n' + '# The contents are the same\n' + '# as \'all.config\'. The reason this file is named this way (and\n' + '# that the config file `num_hidden_layers` contains 1, even though\n' + '# this file may really contain more than 1 hidden layer), is\n' + '# historical... we used to create networks by adding hidden layers\n' + '# one by one (discriminative pretraining), but more recently we\n' + '# have found that it\'s better to add them all at once. This file\n' + '# exists to enable the older training scripts to work. Note:\n' + '# it contains the inputs of the neural network even though it doesn\'t\n' + '# have to (since they are included in \'init.config\'). This will\n' + '# give us the flexibility to change the scripts in future.\n'); return ans; @@ -450,49 +183,58 @@ def GetConfigHeaders(): # This is where most of the work of this program happens. def WriteConfigFiles(config_dir, all_layers): - config_basename_to_lines = defaultdict(list)2 + # config_basename_to_lines is map from the basename of the + # config, as a string (i.e. 'ref', 'all', 'init') to a list of + # strings representing lines to put in the config file. + config_basename_to_lines = defaultdict(list) config_basename_to_header = GetConfigHeaders() + for layer in all_layers: + try: + pairs = layer.GetFullConfig() + for config_basename, line in pairs: + config_basename_to_lines[config_basename].append(line) + except Exception as e: + sys.exit('{0}: error producing config lines from xconfig ' + 'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer), + repr(e))) + + # currently we don't expect any of the GetFullConfig functions to output to + # config-basename 'layer1'... currently we just copy this from + # config-basename 'all', for back-compatibility to older scripts. + assert not 'layer1' in config_basename_to_lines + config_basename_to_lines['layer1'] = config_basename_to_lines['all'] + + for basename,lines in config_basename_to_lines.items(): + header = config_basename_to_header[basename] + filename = '{0}/{1}.config'.format(config_dir, basename) + try: + f = open(filename, 'w') + print(header, file=f) + for line in lines: + print(line, file=f) + f.close() + except Exception as e: + sys.exit('{0}: error writing to config file {1}: error is {2}'.format( + sys.argv[0], filename, repr(e))) + def Main(): args = GetArgs() - BackUpXconfigFile(args.xconfig_file, args.config_dir) - all_layers = ReadXconfigFile(args.xconfig_file) + WriteExpandedXconfigFiles(args.config_dir, all_layers) + WriteConfigFiles(args.config_dir, all_layers) - WriteExpandedXconfigFile(args.config_dir all_layers) - try: - f = - shutil.copyfile(args.xconfig_file, args.xconfig_dir - - MakeConfigs(config_dir = args.config_dir, - splice_indexes_string = args.splice_indexes, - feat_dim = args.feat_dim, ivector_dim = args.ivector_dim, - num_targets = args.num_targets, - add_lda = args.add_lda, - cnn_layer = args.cnn_layer, - cnn_bottleneck_dim = args.cnn_bottleneck_dim, - cepstral_lifter = args.cepstral_lifter, - nonlin_type = args.nonlin_type, - nonlin_input_dim = args.nonlin_input_dim, - nonlin_output_dim = args.nonlin_output_dim, - subset_dim = args.subset_dim, - nonlin_output_dim_init = args.nonlin_output_dim_init, - nonlin_output_dim_final = args.nonlin_output_dim_final, - use_presoftmax_prior_scale = args.use_presoftmax_prior_scale, - final_layer_normalize_target = args.final_layer_normalize_target, - include_log_softmax = args.include_log_softmax, - add_final_sigmoid = args.add_final_sigmoid, - xent_regularize = args.xent_regularize, - xent_separate_forward_affine = args.xent_separate_forward_affine, - self_repair_scale = args.self_repair_scale_nonlinearity, - objective_type = args.objective_type) - -if __name__ == "__main__": + +if __name__ == '__main__': Main() + + +# test: +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo From a2d120a5c3103a07640cc5071b1a5e9192f6deba Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 3 Nov 2016 17:53:55 -0400 Subject: [PATCH 07/12] Import modules only --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 21 +++++++++---------- .../libs/{xconfig_lib.py => xconfig_utils.py} | 0 egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 6 +++--- 3 files changed, 13 insertions(+), 14 deletions(-) rename egs/wsj/s5/steps/nnet3/libs/{xconfig_lib.py => xconfig_utils.py} (100%) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index c8511600b6c..7af70d96ae8 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -7,8 +7,7 @@ import traceback import time import argparse -from xconfig_lib import * - +import xconfig_utils # A base-class for classes representing layers of xconfig files. @@ -29,7 +28,7 @@ def __init__(self, first_token, key_to_value, all_layers): if not 'name' in key_to_value: raise RuntimeError("Expected 'name' to be specified.") self.name = key_to_value['name'] - if not IsValidLineName(self.name): + if not xconfig_utils.IsValidLineName(self.name): raise RuntimeError("Invalid value: name={0}".format(key_to_value['name'])) # the following, which should be overridden in the child class, sets @@ -53,7 +52,7 @@ def SetConfigs(self, key_to_value, all_layers): if not key in self.config: raise RuntimeError("Configuration value {0}={1} was not expected in " "layer of type {2}".format(key, value, self.layer_type)) - self.config[key] = ConvertValueToType(key, type(self.config[key]), value) + self.config[key] = xconfig_utils.ConvertValueToType(key, type(self.config[key]), value) self.descriptors = dict() @@ -119,10 +118,10 @@ def NormalizeDescriptors(self): # of type XconfigLayerBase) so that it can work out a list of the names of # other layers, and get dimensions from them. def ConvertToDescriptor(self, descriptor_string, all_layers): - prev_names = GetPrevNames(all_layers, self) - tokens = TokenizeDescriptor(descriptor_string, prev_names) + prev_names = xconfig_utils.GetPrevNames(all_layers, self) + tokens = xconfig_utils.TokenizeDescriptor(descriptor_string, prev_names) pos = 0 - (descriptor, pos) = ParseNewDescriptor(tokens, pos, prev_names) + (descriptor, pos) = xconfig_utils.ParseNewDescriptor(tokens, pos, prev_names) # note: 'pos' should point to the 'end of string' marker # that terminates 'tokens'. if pos != len(tokens) - 1: @@ -133,14 +132,14 @@ def ConvertToDescriptor(self, descriptor_string, all_layers): # Returns the dimension of a Descriptor object. # This is a convenience function used in SetConfigs. def GetDimForDescriptor(self, descriptor, all_layers): - layer_to_dim_func = lambda name: GetDimFromLayerName(all_layers, self, name) + layer_to_dim_func = lambda name: xconfig_utils.GetDimFromLayerName(all_layers, self, name) return descriptor.Dim(layer_to_dim_func) # Returns the 'final' string form of a Descriptor object, as could be used # in config files. # This is a convenience function provided for use in child classes; def GetStringForDescriptor(self, descriptor, all_layers): - layer_to_string_func = lambda name: GetStringFromLayerName(all_layers, self, name) + layer_to_string_func = lambda name: xconfig_utils.GetStringFromLayerName(all_layers, self, name) return descriptor.ConfigString(layer_to_string_func) # Name() returns the name of this layer, e.g. 'affine1'. It does not @@ -325,7 +324,7 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): # 'prev_names' is a list of the names of preceding lines of the # config file. def ConfigLineToObject(config_line, prev_names = None): - (first_token, key_to_value) = ParseConfigLine(config_line) + (first_token, key_to_value) = xconfig_utils.ParseConfigLine(config_line) return ParsedLineToXconfigLayer(first_token, key_to_value, prev_names) @@ -345,7 +344,7 @@ def ReadXconfigFile(xconfig_filename): line = f.readline() if line == '': break - x = ParseConfigLine(line) + x = xconfig_utils.ParseConfigLine(line) if x is None: continue # line was blank or only comments. (first_token, key_to_value) = x diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py similarity index 100% rename from egs/wsj/s5/steps/nnet3/libs/xconfig_lib.py rename to egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 2684e062e8e..56404a0e17d 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -16,8 +16,8 @@ # the following is in case we weren't running this from the normal directory. sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/libs/') -from xconfig_lib import * -from xconfig_layers import * +import xconfig_utils +import xconfig_layers def GetArgs(): @@ -226,7 +226,7 @@ def WriteConfigFiles(config_dir, all_layers): def Main(): args = GetArgs() BackUpXconfigFile(args.xconfig_file, args.config_dir) - all_layers = ReadXconfigFile(args.xconfig_file) + all_layers = xconfig_layers.ReadXconfigFile(args.xconfig_file) WriteExpandedXconfigFiles(args.config_dir, all_layers) WriteConfigFiles(args.config_dir, all_layers) From 0e294a4726805789f6c92fad6ce7613214f8660f Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 3 Nov 2016 23:37:11 -0400 Subject: [PATCH 08/12] Add output layer with affine component --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 133 +++++++++++++++++- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 11 +- 2 files changed, 137 insertions(+), 7 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index 7af70d96ae8..5a7301696c8 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -291,13 +291,138 @@ def GetFullConfig(self): # note: each value of self.descriptors is (descriptor, dim, # normalized-string, output-string). # by 'output-string' we mean a string that can appear in - # config-files, i.e. it contains the 'final' names of - descriptor_output_str = self.descriptors['input'][3] + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_str = self.descriptors['input'][3] for config_name in [ 'ref', 'all' ]: ans.append( (config_name, 'output-node name={0} input={1}'.format( - self.name, descriptor_output_str))) + self.name, descriptor_final_str))) + return ans + + +# This class is for lines like +# 'output-layer name=output dim=4257 input=Append(input@-1, input@0, input@1, ReplaceIndex(ivector, t, 0))' +# By default this includes a log-softmax component. The parameters are initialized to zero, as +# this is best for output layers. +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer, will normally equal the number of pdfs.] +# include-log-softmax=true [setting it to false will omit the log-softmax component- useful for chain +# models.] +# objective-type=linear [the only other choice currently is 'quadratic', for use in regression +# problems] + +# learning-rate-factor=1.0 [Learning rate factor for the final affine component, multiplies the +# standard learning rate. normally you'll leave this as-is, but for +# xent regularization output layers for chain models you'll want to set +# learning-rate-factor=(0.5/xent_regularize), normally +# learning-rate-factor=5.0 since xent_regularize is normally 0.1. +# presoftmax-scale-file='' [If set, a filename for a vector that will be used to scale the output +# of the affine component before the log-softmax (if +# include-log-softmax=true), or before the output (if not). This is +# helpful to avoid instability in training due to some classes having +# much more data than others. The way we normally create this vector +# is to take the priors of the classes to the power -0.25 and rescale +# them so the average is 1.0. This factor -0.25 is referred to +# as presoftmax_prior_scale_power in scripts.] +# In the scripts this would normally be set to config_dir/presoftmax_prior_scale.vec +class XconfigOutputLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'output-layer' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'include-log-softmax':True, + 'objective-type':'linear', 'learning-rate-factor':1.0, + 'include-log-softmax':True, 'presoftmax-scale-file':'' } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise RuntimeError("In output-layer, dim has invalid value {0}".format(self.config['dim'])) + if self.config['objective-type'] != 'linear' and self.config['objective_type'] != 'quadratic': + raise RuntimeError("In output-layer, objective-type has invalid value {0}".format( + self.config['objective-type'])) + if self.config['learning-rate-factor'] <= 0.0: + raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format( + self.config['learning-rate-factor'])) + + pass # nothing to check; descriptor-parsing can't happen in this function. + + + # you cannot access the output of this layer from other layers... see + # comment in OutputName for the reason why. + def Qualifiers(self): + return [] + + def OutputName(self, qualifier = None): + # Note: nodes of type output-node in nnet3 may not be accessed in Descriptors, + # so calling this with qualifier=None doesn't make sense. But it might make + # sense to make the output of the softmax layer and/or the output of the + # affine layer available as inputs to other layers, in some circumstances. + # we'll implement that when it's needed. + raise RuntimeError("Outputs of output-layer may not be used by other layers") + + def OutputDim(self, qualifier = None): + # see comment in OutputName(). + raise RuntimeError("Outputs of output-layer may not be used by other layers") + + def GetFullConfig(self): + # the input layers need to be printed in 'init.config' (which + # initializes the neural network prior to the LDA), in 'ref.config', + # which is a version of the config file used for getting left and right + # context (it doesn't read anything for the LDA-like transform and/or + # presoftmax-prior-scale components) + # In 'full.config' we write everything, this is just for reference, + # and also for cases where we don't use the LDA-like transform. + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + objective_type = self.config['objective-type'] + learning_rate_factor = self.config['learning-rate-factor'] + include_log_softmax = self.config['include-log-softmax'] + presoftmax_scale_file = self.config['presoftmax-scale-file'] + + for config_name in [ 'ref', 'all' ]: + # First the affine node. + line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} ' + 'output-dim={2} param-stddev=0 bias-stddev=0 '.format( + self.name, input_dim, output_dim) + + ('learning-rate-factor={0} '.format(learning_rate_factor) + if learning_rate_factor != 1.0 else '')) + ans.append((config_name, line)) + line = ('component-node name={0}.affine component={0}.affine input={1}'.format( + self.name, descriptor_final_string)) + ans.append((config_name, line)) + cur_node = '{0}.affine'.format(descriptor_final_string) + if presoftmax_scale_file != '' and config_name == 'all': + # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the + # time we evaluate it. (ref.config is used to find the left/right context). + line = ('component name={0}.fixed-scale type=FixedScaleComponent scales={1}'.format( + self.name, presoftmax_scale_file)) + ans.append((config_name, line)) + line = ('component-node name={0}.fixed-scale component={0}.fixed-scale input={1}'.format( + self.name, cur_node)) + ans.append((config_name, line)) + cur_node = '{0}.fixed-scale'.format(self.name) + if include_log_softmax: + line = ('component name={0}.log-softmax type=LogSoftmaxComponent dim={1}'.format( + self.name, output_dim)) + ans.append((config_name, line)) + line = ('component-node name={0}.log-softmax component={0}.log-softmax input={1}'.format( + self.name, cur_node)) + ans.append((config_name, line)) + cur_node = '{0}.log-softmax'.format(self.name) + line = ('output-node name={0} input={0}.log-softmax'.format(self.name, cur_node)) + ans.append((config_name, line)) return ans @@ -312,6 +437,8 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): return XconfigInputLayer(first_token, key_to_value, prev_names) elif first_token == 'output': return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names) + elif first_token == 'output-layer': + return XconfigOutputLayer(first_token, key_to_value, prev_names) else: raise RuntimeError("Error parsing xconfig line (no such layer type): " + first_token + ' ' + diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 56404a0e17d..50ad3d4d800 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -196,9 +196,10 @@ def WriteConfigFiles(config_dir, all_layers): for config_basename, line in pairs: config_basename_to_lines[config_basename].append(line) except Exception as e: - sys.exit('{0}: error producing config lines from xconfig ' + print('{0}: error producing config lines from xconfig ' 'line \'{1}\': error was: {2}'.format(sys.argv[0], str(layer), - repr(e))) + repr(e)), file=sys.stderr) + raise(e) # currently we don't expect any of the GetFullConfig functions to output to # config-basename 'layer1'... currently we just copy this from @@ -216,8 +217,9 @@ def WriteConfigFiles(config_dir, all_layers): print(line, file=f) f.close() except Exception as e: - sys.exit('{0}: error writing to config file {1}: error is {2}'.format( - sys.argv[0], filename, repr(e))) + print('{0}: error writing to config file {1}: error is {2}'.format( + sys.argv[0], filename, repr(e)), file=sys.stderr) + raise e @@ -238,3 +240,4 @@ def Main(): # test: # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo From 7a05e2265a3e7b897769ca686bb0f903e751dcc9 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 4 Nov 2016 01:09:09 -0400 Subject: [PATCH 09/12] Add support for common layer types such as relu+renorm. --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 124 ++++++++++++++++-- egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py | 6 + egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 2 + 3 files changed, 122 insertions(+), 10 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index 5a7301696c8..e71b36cd63e 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -349,8 +349,6 @@ def CheckConfigs(self): raise RuntimeError("In output-layer, learning-rate-factor has invalid value {0}".format( self.config['learning-rate-factor'])) - pass # nothing to check; descriptor-parsing can't happen in this function. - # you cannot access the output of this layer from other layers... see # comment in OutputName for the reason why. @@ -370,13 +368,6 @@ def OutputDim(self, qualifier = None): raise RuntimeError("Outputs of output-layer may not be used by other layers") def GetFullConfig(self): - # the input layers need to be printed in 'init.config' (which - # initializes the neural network prior to the LDA), in 'ref.config', - # which is a version of the config file used for getting left and right - # context (it doesn't read anything for the LDA-like transform and/or - # presoftmax-prior-scale components) - # In 'full.config' we write everything, this is just for reference, - # and also for cases where we don't use the LDA-like transform. ans = [] # note: each value of self.descriptors is (descriptor, dim, @@ -391,6 +382,9 @@ def GetFullConfig(self): include_log_softmax = self.config['include-log-softmax'] presoftmax_scale_file = self.config['presoftmax-scale-file'] + + # note: ref.config is used only for getting the left-context and right-context + # of the network; all.config is where we put the actual network definition. for config_name in [ 'ref', 'all' ]: # First the affine node. line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} ' @@ -402,7 +396,7 @@ def GetFullConfig(self): line = ('component-node name={0}.affine component={0}.affine input={1}'.format( self.name, descriptor_final_string)) ans.append((config_name, line)) - cur_node = '{0}.affine'.format(descriptor_final_string) + cur_node = '{0}.affine'.format(self.name) if presoftmax_scale_file != '' and config_name == 'all': # don't use the presoftmax-scale in 'ref.config' since that file won't exist at the # time we evaluate it. (ref.config is used to find the left/right context). @@ -426,6 +420,114 @@ def GetFullConfig(self): return ans +# This class is for lines like +# 'relu-renorm-layer name=layer1 dim=1024 input=Append(-3,0,3)' +# or: +# 'sigmoid-layer name=layer1 dim=1024 input=Append(-3,0,3)' +# Here, the name of the layer itself dictates the sequence of nonlinearities +# that are applied; the name should contain some combination of 'relu', 'renorm', +# 'sigmoid' and 'tanh', and these nonlinearities will be added after the +# affine component. +# +# The dimension specified is the output dim; the input dim is worked out from the input descriptor. +# This class supports only nonlinearity types that do not change the dimension; we can create +# another layer type to enable the use p-norm and similar dimension-reducing nonlinearities. +# +# See other configuration values below. +# +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer, e.g. 1024] +# self-repair-scale=1.0e-05 [Affects relu, sigmoid and tanh layers.] +# +# Configuration values that we might one day want to add here, but which we +# don't yet have, include target-rms (affects 'renorm' component). +class XconfigSimpleLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + # Here we just list some likely combinations.. you can just add any + # combinations you want to use, to this list. + assert first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', + 'tanh-layer' ] + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'self-repair-scale':1.0e-05 } + + def CheckConfigs(self): + if self.config['dim'] <= 0: + raise RuntimeError("In {0}, dim has invalid value {1}".format(self.layer_type, + self.config['dim'])) + if self.config['self-repair-scale'] < 0.0 or self.config['self-repair-scale'] > 1.0: + raise RuntimeError("In {0}, objective-type has invalid value {0}".format( + self.layer_type, self.config['self-repair-scale'])) + + def OutputName(self, qualifier = None): + assert qualifier == None + + split_layer_name = self.layer_type.split('-') + assert split_layer_name[-1] == 'layer' + last_nonlinearity = split_layer_name[-2] + # return something like: layer3.renorm + return '{0}.{1}'.format(self.name, last_nonlinearity) + + def OutputDim(self, qualifier = None): + return self.config['dim'] + + def GetFullConfig(self): + + ans = [] + + split_layer_name = self.layer_type.split('-') + assert split_layer_name[-1] == 'layer' + nonlinearities = split_layer_name[:-1] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + self_repair_scale = self.config['self-repair-scale'] + + for config_name in [ 'ref', 'all' ]: + # First the affine node. + line = ('component name={0}.affine type=NaturalGradientAffineComponent input-dim={1} ' + 'output-dim={2} '.format(self.name, input_dim, output_dim)) + ans.append((config_name, line)) + line = ('component-node name={0}.affine component={0}.affine input={1}'.format( + self.name, descriptor_final_string)) + ans.append((config_name, line)) + cur_node = '{0}.affine'.format(self.name) + + for nonlinearity in nonlinearities: + if nonlinearity == 'relu': + line = ('component name={0}.{1} type=RectifiedLinearComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'sigmoid': + line = ('component name={0}.{1} type=SigmoidComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'tanh': + line = ('component name={0}.{1} type=TanhComponent dim={2} ' + 'self-repair-scale={3}'.format(self.name, nonlinearity, output_dim, + self_repair_scale)) + elif nonlinearity == 'renorm': + line = ('component name={0}.{1} type=NormalizeComponent dim={2} '.format( + self.name, nonlinearity, output_dim)) + else: + raise RuntimeError("Unknown nonlinearity type: {0}".format(nonlinearity)) + ans.append((config_name, line)) + line = 'component-node name={0}.{1} component={0}.{1} input={2}'.format( + self.name, nonlinearity, cur_node) + ans.append((config_name, line)) + cur_node = '{0}.{1}'.format(self.name, nonlinearity) + return ans + + # Converts a line as parsed by ParseConfigLine() into a first # token e.g. 'input-layer' and a key->value map, into # an objet inherited from XconfigLayerBase. @@ -439,6 +541,8 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): return XconfigTrivialOutputLayer(first_token, key_to_value, prev_names) elif first_token == 'output-layer': return XconfigOutputLayer(first_token, key_to_value, prev_names) + elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]: + return XconfigSimpleLayer(first_token, key_to_value, prev_names) else: raise RuntimeError("Error parsing xconfig line (no such layer type): " + first_token + ' ' + diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py index 782e6ebd3e1..5744ec4fc46 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_utils.py @@ -27,6 +27,12 @@ def GetPrevNames(all_layers, current_layer): if layer is current_layer: break prev_names.append(layer.Name()) + prev_names_set = set() + for name in prev_names: + if name in prev_names_set: + raise RuntimeError("{0}: Layer name {1} is used more than once.".format( + sys.argv[0], name)) + prev_names_set.add(name) return prev_names # [utility function used in xconfig_layers.py] diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 50ad3d4d800..90e13cb46e2 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -241,3 +241,5 @@ def Main(): # test: # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo From b5c6175ec3f1bae605d1b7e294cedc48c920783c Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 4 Nov 2016 01:29:01 -0400 Subject: [PATCH 10/12] Remove unused config. --- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 90e13cb46e2..5a7bc767c8a 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -25,10 +25,6 @@ def GetArgs(): parser = argparse.ArgumentParser(description='Reads an xconfig file and creates config files ' 'for neural net creation and training', epilog='Search egs/*/*/local/nnet3/*sh for examples') - - parser.add_argument('--self-repair-scale-nonlinearity', type=float, - help='A non-zero value activates the self-repair mechanism in ' - 'nonlinearities (larger -> faster self-repair)', default=1.0e-05) parser.add_argument('xconfig_file', help='Filename of input xconfig file') parser.add_argument('config_dir', @@ -44,10 +40,6 @@ def GetArgs(): def CheckArgs(args): if not os.path.exists(args.config_dir): os.makedirs(args.config_dir) - if args.self_repair_scale_nonlinearity < 0.0 or args.self_repair_scale_nonlinearity > 0.1: - sys.exit('{0}: invalid option --self-repair-scale-nonlinearity={1}'.format( - sys.argv[0], args.self_repair_scale_nonlinearity)) - return args From 6592d945ad1ebc740b395774e36a9b0e71f5c989 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 4 Nov 2016 02:01:50 -0400 Subject: [PATCH 11/12] Add support for fixed-affine-layer --- egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py | 82 ++++++++++++++++++- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 4 + 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py index e71b36cd63e..2990e290152 100644 --- a/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py +++ b/egs/wsj/s5/steps/nnet3/libs/xconfig_layers.py @@ -528,6 +528,84 @@ def GetFullConfig(self): return ans +# This class is for lines like +# 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat' +# +# The output dimension of the layer may be specified via 'dim=xxx', but if not specified, +# the dimension defaults to the same as the input. Note: we don't attempt to read that +# file at the time the config is created, because in the recipes, that file is created +# after the config files. +# +# See other configuration values below. +# +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# dim=-1 [Output dimension of layer; defaults to the same as the input dim.] +# affine-transform-file='' [Must be specified.] +# +# Configuration values that we might one day want to add here, but which we +# don't yet have, include target-rms (affects 'renorm' component). +class XconfigFixedAffineLayer(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + assert first_token == 'fixed-affine-layer' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def SetDefaultConfigs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', 'dim':-1, 'affine-transform-file':'' } + + def CheckConfigs(self): + if self.config['affine-transform-file'] == '': + raise RuntimeError("In fixed-affine-layer, affine-transform-file must be set.") + + def OutputName(self, qualifier = None): + assert qualifier == None + return self.name + + def OutputDim(self, qualifier = None): + output_dim = self.config['dim'] + # If not set, the output-dim defaults to the input-dim. + if output_dim <= 0: + output_dim = self.descriptors['input'][1] + return output_dim + + def GetFullConfig(self): + ans = [] + + # note: each value of self.descriptors is (descriptor, dim, + # normalized-string, output-string). + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + descriptor_final_string = self.descriptors['input'][3] + input_dim = self.descriptors['input'][1] + output_dim = self.config['dim'] + transform_file = self.config['affine-transform-file'] + if output_dim <= 0: + output_dim = input_dim + + + # to init.config we write an output-node with the name 'output' and + # with a Descriptor equal to the descriptor that's the input to this + # layer. This will be used to accumulate stats to learn the LDA transform. + line = 'output-node name=output input={0}'.format(descriptor_final_string) + ans.append(('init', line)) + + # write the 'real' component to all.config + line = 'component name={0} type=FixedAffineComponent matrix={1}'.format( + self.name, transform_file) + ans.append(('all', line)) + # write a random version of the component, with the same dims, to ref.config + line = 'component name={0} type=FixedAffineComponent input-dim={1} output-dim={2}'.format( + self.name, input_dim, output_dim) + ans.append(('ref', line)) + # the component-node gets written to all.config and ref.config. + line = 'component-node name={0} component={0} input={1}'.format( + self.name, descriptor_final_string) + ans.append(('all', line)) + ans.append(('ref', line)) + return ans + # Converts a line as parsed by ParseConfigLine() into a first # token e.g. 'input-layer' and a key->value map, into # an objet inherited from XconfigLayerBase. @@ -543,10 +621,12 @@ def ParsedLineToXconfigLayer(first_token, key_to_value, prev_names): return XconfigOutputLayer(first_token, key_to_value, prev_names) elif first_token in [ 'relu-layer', 'relu-renorm-layer', 'sigmoid-layer', 'tanh-layer' ]: return XconfigSimpleLayer(first_token, key_to_value, prev_names) + elif first_token == 'fixed-affine-layer': + return XconfigFixedAffineLayer(first_token, key_to_value, prev_names) else: raise RuntimeError("Error parsing xconfig line (no such layer type): " + first_token + ' ' + - ' '.join(['{0} {1}'.format(x,y) for x,y in key_to_value.items()])) + ' '.join(['{0}={1}'.format(x,y) for x,y in key_to_value.items()])) # Uses ParseConfigLine() to turn a config line that has been parsed into diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 5a7bc767c8a..60a2ba384b8 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -235,3 +235,7 @@ def Main(): # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + +# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'input dim=100 name=ivector'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo + + From e99cea915d1d5020011b7f702cf19c65b800b8ac Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 4 Nov 2016 02:23:29 -0400 Subject: [PATCH 12/12] Small fix to example command. --- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 60a2ba384b8..bd841aae1f2 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -236,6 +236,6 @@ def Main(): # mkdir -p foo; (echo 'input dim=40 name=input'; echo 'relu-renorm-layer name=affine1 dim=1024'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo -# mkdir -p foo; (echo 'input dim=40 name=input'; echo 'input dim=100 name=ivector'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo +# mkdir -p foo; (echo 'input dim=100 name=ivector'; echo 'input dim=40 name=input'; echo 'fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=foo/bar/lda.mat'; echo 'output-layer name=output dim=1924 input=Append(-1,0,1)') >xconfig; ./xconfig_to_configs.py xconfig foo