Add comments.

official-stockfish · Dec 8, 2020 · 5593dc4 · 5593dc4
1 parent a924cb0
commit 5593dc4
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 0 deletions.
diff --git a/feature_block.py b/feature_block.py
@@ -4,6 +4,49 @@ def _get_main_factor_name(full_name):
     return full_name.replace('^', '')
 
 class FeatureBlock:
+    '''
+    This is the base class for all the network input features.
+    All features must inherit from this class.
+    It abstracts a named set of features in a way that
+    allows seamless introduction of factorizers.
+
+    For example a set of HalfKP features is a subclass of this class, and
+    so is the HalfKP^ feature set (where "^" denotes that it's factorized).
+
+    There are 3 fundamental things about a feature block which it needs for construction:
+        - name - whatever, just please use ascii. Also we'd like "^" to be reserved to
+                 denote that the block is a factorized version of some other block.
+        - hash - a 32 bit unsigned integer, as defined in the original nodchip trainer
+        - factors - the ordered list of named "festure subblocks". If there's more than one
+                    it's assumed that it's a factorized feature block.
+
+    More about factors, because it's the fundamental building block.
+    A block can have just one factor, like HalfKP, but sometimes it's possible to
+    factorize some features further. Ideally we don't want to have multiple
+    features talking about the same thing when the net is actually used for play,
+    because it's wasteful, but it's helpful during training because it makes it
+    easier to generalize over similar positions. This is for example utilized by HalfKP^,
+    which defines 3 factors: HalfKP, HalfK, and P.
+    Factors are passed to the constructor as an OrderedDict from string to the number of dimensions.
+    The first factor is the "real" factor (or "main" factor), one that is supposed to be used for play.
+    The following factors (if any) are the "virtual" factors, and are only used for training.
+    Based on these factors and their dimensions FeatureBlock defines 3 values:
+        - num_real_features - the number of unfactorized features that the resulting net will use in play
+        - num_virtual_features - the number of additional features used for learning
+                                 that will be coalesced when converting to .nnue
+        - num_features - the total number of features defined by the factors.
+                         should num_real_features + num_virtual_features
+
+    FeatureBlock provides default method implementations that abstract away the
+    factorized/unfactorized nature of the feature block. These methods are described in
+    their own docstrings.
+
+    The only method that the superclass of FeatureBlock must define is
+    get_active_features (def get_active_features(self, board: chess.Board)),
+    which takes the board and returns the list of indices of the features
+    that are active for this board.
+    '''
+
     def __init__(self, name, hash, factors):
         if not isinstance(factors, OrderedDict):
             raise Exception('Factors must be an collections.OrderedDict')
@@ -18,9 +61,18 @@ def __init__(self, name, hash, factors):
     def get_main_factor_name(self):
         return _get_main_factor_name(self.name)
 
+    '''
+    This method represents the default factorizer. If your feature block
+    has multiple factors you need to override this method to return
+    a list of factors for a given feature.
+    '''
     def get_feature_factors(self, idx):
         return [idx]
 
+    '''
+    This method takes a string name of a factor and returns the offset of the
+    first feature in this factor when consulted with the sizes of the previous factors.
+    '''
     def get_factor_base_feature(self, name):
         offset = 0
         for n, s in self.factors.items():

diff --git a/feature_set.py b/feature_set.py
@@ -9,6 +9,14 @@ def _calculate_features_hash(features):
     return features[0].hash ^ (tail_hash << 1) ^ (tail_hash >> 1) & 0xffffffff
 
 class FeatureSet:
+    '''
+    A feature set is nothing more than a list of named FeatureBlocks.
+    It itself functions similarily to a feature block, but we don't want to be
+    explicit about it as we don't want it to be used as a building block for other
+    feature sets. You can think of this class as a composite, but not the full extent.
+    It is basically a concatenation of feature blocks.
+    '''
+
     def __init__(self, features):
         for feature in features:
             if not isinstance(feature, FeatureBlock):
@@ -21,6 +29,12 @@ def __init__(self, features):
         self.num_virtual_features = sum(feature.num_virtual_features for feature in features)
         self.num_features = sum(feature.num_features for feature in features)
 
+    '''
+    This method returns the feature ranges for the virtual factors of the
+    underlying feature blocks. This is useful to know during initialization,
+    when we want to zero initialize the virtual feature weights, but give some other
+    values to the real feature weights.
+    '''
     def get_virtual_feature_ranges(self):
         ranges = []
         offset = 0
@@ -31,6 +45,12 @@ def get_virtual_feature_ranges(self):
 
         return ranges
 
+    '''
+    This method goes over all of the feature blocks and gathers the active features.
+    Each block has its own index space assigned so the features from two different
+    blocks will never have the same index here. Basically the thing you would expect
+    to happen after concatenating many feature blocks.
+    '''
     def get_active_features(self, board):
         w = []
         b = []
@@ -44,6 +64,11 @@ def get_active_features(self, board):
 
         return w, b
 
+    '''
+    This method takes a feature idx and looks for the block that owns it.
+    If it found the block it asks it to factorize the index, otherwise
+    it throws and Exception. The idx must refer to a real feature.
+    '''
     def get_feature_factors(self, idx):
         offset = 0
         for feature in self.features:
@@ -53,6 +78,16 @@ def get_feature_factors(self, idx):
 
         raise Exception('No feature block to factorize {}'.format(idx))
 
+    '''
+    This method does what get_feature_factors does but for all
+    valid features at the same time. It returns a list of length
+    self.num_real_features with ith element being a list of factors
+    of the ith feature.
+    This method is technically redundant but it allows to perform the operation
+    slightly faster when there's many feature blocks. It might be worth
+    to add a similar method to the FeatureBlock itself - to make it faster
+    for feature blocks with many factors.
+    '''
     def get_virtual_to_real_features_gather_indices(self):
         indices = []
         real_offset = 0

diff --git a/features.py b/features.py
@@ -3,9 +3,16 @@
 
 import argparse
 
+'''
+Each module that defines feature blocks must be imported here and
+added to the _feature_modules list. Each such module must define a
+function `get_feature_block_clss` at module scope that returns the list
+of feature block classes in that module.
+'''
 import halfkp
 
 _feature_modules = [halfkp]
+
 _feature_blocks_by_name = dict()
 
 def _add_feature_block(feature_block_cls):

diff --git a/halfkp.py b/halfkp.py
@@ -45,5 +45,8 @@ def get_feature_factors(self, idx):
 
     return [idx, self.get_factor_base_feature('HalfK') + k_idx, self.get_factor_base_feature('P') + p_idx]
 
+'''
+This is used by the features module for discovery of feature blocks.
+'''
 def get_feature_block_clss():
   return [Features, FactorizedFeatures]
diff --git a/model.py b/model.py
@@ -30,12 +30,24 @@ def __init__(self, feature_set, lambda_=1.0):
 
     self._zero_virtual_feature_weights()
 
+  '''
+  We zero all virtual feature weights because during serialization to .nnue
+  we compute weights for each real feature as being the sum of the weights for
+  the real feature in question and the virtual features it can be factored to.
+  This means that if we didn't initialize the virtual feature weights to zero
+  we would end up with the real features having effectively unexpected values
+  at initialization - following the bell curve based on how many factors there are.
+  '''
   def _zero_virtual_feature_weights(self):
     weights = self.input.weight
     for a, b in self.feature_set.get_virtual_feature_ranges():
       weights[a:b, :] = 0.0
     self.input.weight = nn.Parameter(weights)
 
+  '''
+  This method attempts to convert the model from using the self.feature_set
+  to new_feature_set.
+  '''
   def set_feature_set(self, new_feature_set):
     if self.feature_set.name == new_feature_set.name:
       return
@@ -45,8 +57,22 @@ def set_feature_set(self, new_feature_set):
     if len(self.feature_set.features) > 1:
       raise Exception('Cannot change feature set from {} to {}.'.format(self.feature_set.name, new_feature_set.name))
 
+    # Currently we only support conversion for feature sets with
+    # one feature block each so we'll dig the feature blocks directly
+    # and forget about the set.
     old_feature_block = self.feature_set.features[0]
     new_feature_block = new_feature_set.features[0]
+
+    # next(iter(new_feature_block.factors)) is the way to get the
+    # first item in a OrderedDict. (the ordered dict being str : int
+    # mapping of the factor name to its size).
+    # It is our new_feature_factor_name.
+    # For example old_feature_block.name == "HalfKP"
+    # and new_feature_factor_name == "HalfKP^"
+    # We assume here that the "^" denotes factorized feature block
+    # and we would like feature block implementers to follow this convention.
+    # So if our current feature_set matches the first factor in the new_feature_set
+    # we only have to add the virtual feature on top of the already existing real ones.
     if old_feature_block.name == next(iter(new_feature_block.factors)):
       # We can just extend with zeros since it's unfactorized -> factorized
       weights = self.input.weight