Fix Regularization (google#21)

LeegleechN · web-flow · commit bfbc881ca4c6 · 2017-02-22T16:24:03.000-08:00
Regularization is now being applied. I set it conservatively to 1e-8 by default.
diff --git a/frame_level_models.py b/frame_level_models.py
@@ -77,7 +77,7 @@ def create_model(self, model_input, vocab_size, num_frames, **unused_params):
 
     output = slim.fully_connected(
         avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid,
-        weights_regularizer=slim.l2_regularizer(0.01))
+        weights_regularizer=slim.l2_regularizer(1e-8))
     return {"predictions": output}
 
 class DBoFModel(models.BaseModel):
diff --git a/train.py b/train.py
@@ -68,7 +68,7 @@
       "label_loss", "CrossEntropyLoss",
       "Which loss function to use for training the model.")
   flags.DEFINE_float(
-      "regularization_penalty", 1e-3,
+      "regularization_penalty", 1,
       "How much weight to give to the regularization loss (the label loss has "
       "a weight of 1).")
   flags.DEFINE_float("base_learning_rate", 0.01,
@@ -172,7 +172,7 @@ def build_graph(reader,
                 batch_size=1000,
                 base_learning_rate=0.01,
                 optimizer_class=tf.train.AdamOptimizer,
-                regularization_penalty=1e-3,
+                regularization_penalty=1,
                 num_readers=1,
                 num_epochs=None):
   """Creates the Tensorflow graph.
@@ -234,6 +234,9 @@ def build_graph(reader,
         reg_loss = result["regularization_loss"]
       else:
         reg_loss = tf.constant(0.0)
+      reg_losses = tf.losses.get_regularization_losses()
+      if reg_losses:
+        reg_loss += tf.add_n(reg_losses)
       if regularization_penalty != 0:
         tf.summary.scalar("reg_loss", reg_loss)
 
diff --git a/video_level_models.py b/video_level_models.py
@@ -30,7 +30,7 @@
 class LogisticModel(models.BaseModel):
   """Logistic model with L2 regularization."""
 
-  def create_model(self, model_input, vocab_size, **unused_params):
+  def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params):
     """Creates a logistic model.
 
     Args:
@@ -43,7 +43,7 @@ def create_model(self, model_input, vocab_size, **unused_params):
       batch_size x num_classes."""
     output = slim.fully_connected(
         model_input, vocab_size, activation_fn=tf.nn.sigmoid,
-        weights_regularizer=slim.l2_regularizer(0.01))
+        weights_regularizer=slim.l2_regularizer(l2_penalty))
     return {"predictions": output}
 
 class MoeModel(models.BaseModel):
@@ -53,7 +53,7 @@ def create_model(self,
                    model_input,
                    vocab_size,
                    num_mixtures=None,
-                   l2_penalty=1e-5,
+                   l2_penalty=1e-8,
                    **unused_params):
     """Creates a Mixture of (Logistic) Experts model.