Revert "Reduce usage of tf.contrib.layers (tensorflow#1350)"

konradczechowski · konradczechowski · commit 7f5817ada381 · 2019-01-16T19:53:24.000+01:00
This reverts commit 57a9720.
diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py
@@ -3577,21 +3577,21 @@ def cyclegan_upsample(net, num_outputs, stride, method="conv2d_transpose"):
       net = tf.image.resize_nearest_neighbor(
           net, [stride[0] * height, stride[1] * width])
       net = tf.pad(net, spatial_pad_1, "REFLECT")
-      net = tf.layers.conv2d(
-          net, num_outputs, (3, 3), activation=tf.nn.relu)
+      net = tf.contrib.layers.conv2d(
+          net, num_outputs, kernel_size=[3, 3], padding="valid")
     elif method == "bilinear_upsample_conv":
       net = tf.image.resize_bilinear(net,
                                      [stride[0] * height, stride[1] * width])
       net = tf.pad(net, spatial_pad_1, "REFLECT")
-      net = tf.layers.conv2d(
-          net, num_outputs, (3, 3), activation=tf.nn.relu)
+      net = tf.contrib.layers.conv2d(
+          net, num_outputs, kernel_size=[3, 3], padding="valid")
     elif method == "conv2d_transpose":
       # This corrects 1 pixel offset for images with even width and height.
       # conv2d is left aligned and conv2d_transpose is right aligned for even
       # sized images (while doing "SAME" padding).
       # Note: This doesn"t reflect actual model in paper.
-      net = tf.layers.conv2d_transpose(
-          net, num_outputs, (3, 3), strides=stride, activation=tf.nn.relu)
+      net = tf.contrib.layers.conv2d_transpose(
+          net, num_outputs, kernel_size=[3, 3], stride=stride, padding="valid")
       net = net[:, 1:, 1:, :]
     else:
       raise ValueError("Unknown method: [%s]" % method)
diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py
@@ -685,8 +685,8 @@ def testCycleGANUpsampleConv2dTranspose(self):
     num_channels = 3
     output_filters = 10
     stride = [2, 3]  # we want height to be x2 and width to be x3
-    random_input = tf.convert_to_tensor(
-        np.random.rand(batch, height, width, num_channels), dtype=tf.float32)
+    random_input = np.random.rand(batch, height, width, num_channels).astype(
+        np.float32)
 
     # conv2d_transpose is a little tricky.
     # height_new = (height_old - 1) * stride + kernel - 2*padding - correction
diff --git a/tensor2tensor/models/research/rl.py b/tensor2tensor/models/research/rl.py
@@ -451,8 +451,8 @@ def feed_forward_gaussian_fun(action_space, config, observations):
   if not isinstance(action_space, gym.spaces.box.Box):
     raise ValueError("Expecting continuous action space.")
 
-  mean_weights_initializer = tf.initializers.variance_scaling(
-      scale=config.init_mean_factor)
+  mean_weights_initializer = tf.contrib.layers.variance_scaling_initializer(
+      factor=config.init_mean_factor)
   logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10)
 
   flat_observations = tf.reshape(observations, [
@@ -463,10 +463,10 @@ def feed_forward_gaussian_fun(action_space, config, observations):
     with tf.variable_scope("policy"):
       x = flat_observations
       for size in config.policy_layers:
-        x = tf.layers.dense(x, size, activation=tf.nn.relu)
-      mean = tf.layers.dense(
-          x, action_space.shape[0], activation=tf.tanh,
-          kernel_initializer=mean_weights_initializer)
+        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
+      mean = tf.contrib.layers.fully_connected(
+          x, action_space.shape[0], tf.tanh,
+          weights_initializer=mean_weights_initializer)
       logstd = tf.get_variable(
           "logstd", mean.shape[2:], tf.float32, logstd_initializer)
       logstd = tf.tile(
@@ -475,8 +475,8 @@ def feed_forward_gaussian_fun(action_space, config, observations):
     with tf.variable_scope("value"):
       x = flat_observations
       for size in config.value_layers:
-        x = tf.layers.dense(x, size, activation=tf.nn.relu)
-      value = tf.layers.dense(x, 1)[..., 0]
+        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
+      value = tf.contrib.layers.fully_connected(x, 1, None)[..., 0]
   mean = tf.check_numerics(mean, "mean")
   logstd = tf.check_numerics(logstd, "logstd")
   value = tf.check_numerics(value, "value")
@@ -505,14 +505,16 @@ def body(self, features):
     with tf.variable_scope("policy"):
       x = flat_observations
       for size in self.hparams.policy_layers:
-        x = tf.layers.dense(x, size, activation=tf.nn.relu)
-      logits = tf.layers.dense(x, self.hparams.problem.num_actions)
+        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
+      logits = tf.contrib.layers.fully_connected(
+          x, self.hparams.problem.num_actions, activation_fn=None
+      )
       logits = tf.expand_dims(logits, axis=1)
     with tf.variable_scope("value"):
       x = flat_observations
       for size in self.hparams.value_layers:
-        x = tf.layers.dense(x, size, activation=tf.nn.relu)
-      value = tf.layers.dense(x, 1)
+        x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu)
+      value = tf.contrib.layers.fully_connected(x, 1, None)
     logits = clip_logits(logits, self.hparams)
     return {"target_policy": logits, "target_value": value}
 
@@ -529,22 +531,23 @@ def body(self, features):
     dropout = getattr(self.hparams, "dropout_ppo", 0.0)
     with tf.variable_scope("feed_forward_cnn_small"):
       x = tf.cast(x, tf.float32) / 255.0
-      x = tf.layers.conv2d(x, 32, (5, 5), strides=(2, 2),
-                           activation=tf.nn.relu, padding="same")
-      x = tf.layers.conv2d(x, 32, (5, 5), strides=(2, 2),
-                           activation=tf.nn.relu, padding="same")
+      x = tf.contrib.layers.conv2d(x, 32, [5, 5], [2, 2],
+                                   activation_fn=tf.nn.relu, padding="SAME")
+      x = tf.contrib.layers.conv2d(x, 32, [5, 5], [2, 2],
+                                   activation_fn=tf.nn.relu, padding="SAME")
 
       flat_x = tf.layers.flatten(x)
       flat_x = tf.layers.dropout(flat_x, rate=dropout)
-      x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu)
+      x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)
 
       logits = tf.layers.dense(
           x, self.hparams.problem.num_actions, name="dense2"
       )
       logits = clip_logits(logits, self.hparams)
       logits = tf.expand_dims(logits, axis=1)
 
-      value = tf.layers.dense(x, 1)
+      value = tf.contrib.layers.fully_connected(
+          x, 1, activation_fn=None)
     return {"target_policy": logits, "target_value": value}
 
 
@@ -597,12 +600,15 @@ def body(self, features):
     with tf.variable_scope("dense_bitwise"):
       flat_x = discretization.int_to_bit_embed(flat_x, 8, 32)
 
-      x = tf.layers.dense(flat_x, 256, activation=tf.nn.relu)
-      x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu)
+      x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu)
+      x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)
 
-      logits = tf.layers.dense(x, self.hparams.problem.num_actions)
+      logits = tf.contrib.layers.fully_connected(
+          x, self.hparams.problem.num_actions, activation_fn=None
+      )
 
-      value = tf.layers.dense(x, 1)[..., 0]
+      value = tf.contrib.layers.fully_connected(
+          x, 1, activation_fn=None)[..., 0]
 
     return {"target_policy": logits, "target_value": value}
 
diff --git a/tensor2tensor/models/research/transformer_vae.py b/tensor2tensor/models/research/transformer_vae.py
@@ -587,7 +587,7 @@ def __init__(self, *args, **kwargs):
                 self._hparams.num_residuals, self._hparams.num_blocks,
                 self._hparams.hidden_size, block_dim
             ],
-            initializer=tf.initializers.glorot_uniform(),
+            initializer=tf.contrib.layers.xavier_initializer(),
             trainable=self._hparams.trainable_projections)
 
         self._hparams.bottleneck = functools.partial(
diff --git a/tensor2tensor/models/revnet.py b/tensor2tensor/models/revnet.py
@@ -49,8 +49,8 @@ def wrapped_partial(fn, *args, **kwargs):
   return wrapped
 
 
-conv_initializer = tf.initializers.variance_scaling(
-    scale=2.0, mode='fan_out')
+conv_initializer = tf.contrib.layers.variance_scaling_initializer(
+    factor=2.0, mode='FAN_OUT')
 
 CONFIG = {'2d': {'conv': wrapped_partial(
     tf.layers.conv2d, kernel_initializer=conv_initializer),
diff --git a/tensor2tensor/models/video/base.py b/tensor2tensor/models/video/base.py
@@ -32,6 +32,10 @@
 import tensorflow as tf
 
 
+tfl = tf.layers
+tfcl = tf.contrib.layers
+
+
 def flat_lists(list_of_lists):
   return [x for l in list_of_lists for x in l]
 
diff --git a/tensor2tensor/models/video/basic_deterministic.py b/tensor2tensor/models/video/basic_deterministic.py
@@ -29,6 +29,10 @@
 import tensorflow as tf
 
 
+tfl = tf.layers
+tfcl = tf.contrib.layers
+
+
 @registry.register_model
 class NextFrameBasicDeterministic(base.NextFrameBase):
   """Basic next-frame model, may take actions and predict rewards too."""
diff --git a/tensor2tensor/models/video/basic_recurrent.py b/tensor2tensor/models/video/basic_recurrent.py
@@ -23,6 +23,12 @@
 from tensor2tensor.models.video import basic_stochastic
 from tensor2tensor.utils import registry
 
+import tensorflow as tf
+
+
+tfl = tf.layers
+tfcl = tf.contrib.layers
+
 
 @registry.register_model
 class NextFrameBasicRecurrent(
diff --git a/tensor2tensor/models/video/emily.py b/tensor2tensor/models/video/emily.py
@@ -276,7 +276,7 @@ def construct_model(self, images, actions, rewards):
     for i, image in enumerate(images):
       with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
         enc, skips = self.encoder(image, rnn_size, has_batchnorm=has_batchnorm)
-        enc = tfl.flatten(enc)
+        enc = tfcl.flatten(enc)
         enc_images.append(enc)
         enc_skips.append(skips)
 
diff --git a/tensor2tensor/models/video/sv2p.py b/tensor2tensor/models/video/sv2p.py
@@ -314,7 +314,7 @@ def construct_predictive_tower(
 
       if self.hparams.model_options == "CDNA":
         # cdna_input = tf.reshape(hidden5, [int(batch_size), -1])
-        cdna_input = tfl.flatten(hidden5)
+        cdna_input = tfcl.flatten(hidden5)
         transformed += common_video.cdna_transformation(
             input_image, cdna_input, num_masks, int(color_channels),
             self.hparams.dna_kernel_size, self.hparams.relu_shift)
diff --git a/tensor2tensor/models/video/svg_lp.py b/tensor2tensor/models/video/svg_lp.py
@@ -180,7 +180,7 @@ def construct_model(self, images, actions, rewards):
     for i, image in enumerate(images):
       with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
         enc, skips = self.encoder(image, g_dim, has_batchnorm=has_batchnorm)
-        enc = tfl.flatten(enc)
+        enc = tfcl.flatten(enc)
         enc_images.append(enc)
         enc_skips.append(skips)
 
@@ -199,7 +199,7 @@ def construct_model(self, images, actions, rewards):
           h_current = enc_images[i-1]
         else:
           h_current, _ = self.encoder(gen_images[-1], g_dim)
-          h_current = tfl.flatten(h_current)
+          h_current = tfcl.flatten(h_current)
 
         # target encoding
         h_target = enc_images[i]
diff --git a/tensor2tensor/utils/optimize.py b/tensor2tensor/utils/optimize.py
@@ -304,6 +304,6 @@ def get_variable_initializer(hparams):
     return tf.variance_scaling_initializer(
         hparams.initializer_gain, mode="fan_avg", distribution="uniform")
   elif hparams.initializer == "xavier":
-    return tf.initializers.glorot_uniform()
+    return tf.contrib.layers.xavier_initializer()
   else:
     raise ValueError("Unrecognized initializer: %s" % hparams.initializer)