Skip to content

Commit 50a3588

Browse files
lgeigerkpe
authored andcommitted
internal merge of PR tensorflow#1411
PiperOrigin-RevId: 231608988
1 parent 37f8e36 commit 50a3588

17 files changed

+27
-50
lines changed

tensor2tensor/data_generators/speech_recognition.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def preprocess_example(self, example, mode, hparams):
122122
# This replaces CMVN estimation on data
123123
var_epsilon = 1e-09
124124
mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1)
125-
variance = tf.reduce_mean(tf.squared_difference(mel_fbanks, mean),
125+
variance = tf.reduce_mean(tf.square(mel_fbanks - mean),
126126
keepdims=True, axis=1)
127127
mel_fbanks = (mel_fbanks - mean) * tf.rsqrt(variance + var_epsilon)
128128

tensor2tensor/layers/common_hparams.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ def basic_params1():
7575
# Mixed precision training only supports exponential scaling currently
7676
# To disable the scaler, see to 0/False
7777
mixed_precision_optimizer_loss_scaler="exponential",
78-
# Determines the initial loss scaling value for mixed precision
79-
mixed_precision_optimizer_init_loss_scale=2**15,
8078
# Whether to zero gradients that were not computed, so that the
8179
# appropriate slots are created. Useful for sharing checkpoints between
8280
# models with different sets of heads.

tensor2tensor/layers/common_layers.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def standardize_images(x):
256256
x = tf.to_float(tf.reshape(x, [-1] + x_shape[-3:]))
257257
x_mean = tf.reduce_mean(x, axis=[1, 2], keepdims=True)
258258
x_variance = tf.reduce_mean(
259-
tf.squared_difference(x, x_mean), axis=[1, 2], keepdims=True)
259+
tf.square(x - x_mean), axis=[1, 2], keepdims=True)
260260
num_pixels = tf.to_float(x_shape[-2] * x_shape[-3])
261261
x = (x - x_mean) / tf.maximum(tf.sqrt(x_variance), tf.rsqrt(num_pixels))
262262
return tf.reshape(x, x_shape)
@@ -634,8 +634,7 @@ def layer_norm_compute(x, epsilon, scale, bias):
634634
"""Layer norm raw computation."""
635635
epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
636636
mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
637-
variance = tf.reduce_mean(
638-
tf.squared_difference(x, mean), axis=[-1], keepdims=True)
637+
variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True)
639638
norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
640639
return norm_x * scale + bias
641640

@@ -691,8 +690,7 @@ def l2_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None):
691690
"l2_norm_bias", [filters], initializer=tf.zeros_initializer())
692691
epsilon, scale, bias = [cast_like(t, x) for t in [epsilon, scale, bias]]
693692
mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
694-
l2norm = tf.reduce_sum(
695-
tf.squared_difference(x, mean), axis=[-1], keepdims=True)
693+
l2norm = tf.reduce_sum(tf.square(x - mean), axis=[-1], keepdims=True)
696694
norm_x = (x - mean) * tf.rsqrt(l2norm + epsilon)
697695
return norm_x * scale + bias
698696

@@ -3348,7 +3346,7 @@ def get_sorted_projections(x):
33483346

33493347
proj1 = get_sorted_projections(logits1)
33503348
proj2 = get_sorted_projections(logits2)
3351-
dist = tf.reduce_mean(tf.squared_difference(proj1, proj2))
3349+
dist = tf.reduce_mean(tf.square(proj1 - proj2))
33523350
if return_logits:
33533351
return dist, logits1, logits2
33543352
return dist

tensor2tensor/layers/discretization.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ def embedding_lookup(x,
217217

218218
# Currently, we use the mean scaling for the commitment loss, as opposed to
219219
# summing across all non-batch dimensions.
220-
q_loss = tf.reduce_mean(tf.squared_difference(tf.stop_gradient(x), x_means))
221-
e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
220+
q_loss = tf.reduce_mean(tf.square((tf.stop_gradient(x) - x_means)))
221+
e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
222222
return x_means_hot, x_means, q_loss, e_loss, neg_q_entropy
223223

224224

@@ -469,8 +469,7 @@ def gumbel_softmax(x,
469469
# Add losses that prevent too few being used.
470470
distrib = tf.reshape(logsm, [-1, 2**z_size]) * maxvhot
471471
d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True)
472-
d_variance = tf.reduce_mean(
473-
tf.squared_difference(distrib, d_mean), axis=[0])
472+
d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0])
474473
d_dev = -tf.reduce_mean(d_variance)
475474
ret = s
476475

@@ -925,7 +924,7 @@ def vq_nearest_neighbor(x, means,
925924
x_means_hot = tf.one_hot(x_means_idx, bottleneck_size)
926925
x_means_hot_flat = tf.reshape(x_means_hot, [-1, bottleneck_size])
927926
x_means = tf.matmul(x_means_hot_flat, means)
928-
e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
927+
e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
929928
return x_means_hot, e_loss, dist
930929

931930

@@ -1334,8 +1333,7 @@ def gumbel_softmax_discrete_bottleneck(x,
13341333
x_means_assignments_flat = tf.reshape(x_means_assignments,
13351334
[-1, bottleneck_size])
13361335
x_means = tf.matmul(x_means_assignments_flat, means)
1337-
commitment_loss = tf.reduce_mean(
1338-
tf.squared_difference(x, tf.stop_gradient(x_means)))
1336+
commitment_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
13391337

13401338
# Update the ema variables.
13411339
updated_ema_count = moving_averages.assign_moving_average(

tensor2tensor/layers/modalities.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -736,8 +736,7 @@ class VideoModalityL2(VideoModalityL1):
736736
"""Modality for videos with L2 loss."""
737737

738738
def internal_loss(self, logits, targets):
739-
return tf.nn.relu(
740-
tf.squared_difference(logits, targets) - self.cutoff * self.cutoff)
739+
return tf.nn.relu((logits - targets)**2 - self.cutoff * self.cutoff)
741740

742741

743742
class VideoModalityL2Raw(VideoModalityL2):
@@ -917,7 +916,7 @@ def targets_bottom(self, x):
917916
return tf.to_float(x)
918917

919918
def loss(self, body_output, targets):
920-
loss = tf.squared_difference(body_output, tf.to_float(targets))
919+
loss = tf.square(body_output - tf.to_float(targets))
921920
return tf.reduce_mean(loss), tf.constant(1.0)
922921

923922

tensor2tensor/layers/vq_discrete.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,8 @@ def embedding_lookup(self, x, means):
138138
x_means_hot, [-1, self.hparams.num_blocks, self.hparams.block_v_size])
139139
x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]), means)
140140
x_means = tf.transpose(x_means, [1, 0, 2])
141-
q_loss = tf.reduce_mean(
142-
tf.squared_difference(tf.stop_gradient(x), x_means))
143-
e_loss = tf.reduce_mean(
144-
tf.squared_difference(x, tf.stop_gradient(x_means)))
141+
q_loss = tf.reduce_mean(tf.square((tf.stop_gradient(x) - x_means)))
142+
e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2)
145143
return x_means_hot, x_means, q_loss, e_loss
146144

147145
def bit_to_int(self, x_bit, num_bits, base=2):

tensor2tensor/models/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
from tensor2tensor.models import revnet
3939
from tensor2tensor.models import shake_shake
4040
from tensor2tensor.models import slicenet
41-
from tensor2tensor.models import text_cnn
4241
from tensor2tensor.models import transformer
4342
from tensor2tensor.models import vanilla_gan
4443
from tensor2tensor.models import xception

tensor2tensor/models/research/autoencoders.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ def body(self, features):
219219
# minimized by just setting x=0 and b=0 -- so we don't want too much
220220
# of the influence of this, and we stop-gradient to not zero-out x.
221221
x_stop = tf.stop_gradient(x)
222-
xb_loss = tf.reduce_mean(tf.reduce_sum(
223-
tf.squared_difference(x_stop, b), axis=-1))
222+
xb_loss = tf.reduce_mean(tf.reduce_sum(tf.square(x_stop - b), axis=-1))
224223
# To prevent this loss from exploding we clip at 1, but anneal clipping.
225224
clip_max = 1.0 / common_layers.inverse_exp_decay(
226225
warm_step, min_value=0.001)

tensor2tensor/models/research/transformer_nat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def vq_nearest_neighbor(x, hparams):
6565
x_means_idx = tf.argmax(-dist, axis=-1)
6666
x_means_hot = tf.one_hot(x_means_idx, depth=bottleneck_size)
6767
x_means = tf.matmul(x_means_hot, means)
68-
e_loss = tf.reduce_mean(tf.squared_difference(x, tf.stop_gradient(x_means)))
68+
e_loss = tf.reduce_mean(tf.square(x - tf.stop_gradient(x_means)))
6969
return x_means_hot, e_loss
7070

7171

tensor2tensor/models/research/transformer_vae.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,7 @@ def ae_transformer_internal(inputs,
436436
losses["neg_q_entropy"] = neg_q_entropy * hparams.entropy_scale
437437
else:
438438
inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c")
439-
losses["latent_pred"] = tf.reduce_mean(
440-
tf.squared_difference(inputs_c, targets_c)) * 20
439+
losses["latent_pred"] = tf.reduce_mean((inputs_c - targets_c)**2) * 20
441440
def bn_inputs():
442441
with tf.variable_scope(tf.get_variable_scope(), reuse=True):
443442
bn, _, _, _, _ = hparams.bottleneck(

0 commit comments

Comments
 (0)