diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py index 03e88bd1fc5e..14b058488b7e 100644 --- a/python/ray/rllib/models/action_dist.py +++ b/python/ray/rllib/models/action_dist.py @@ -43,21 +43,21 @@ def logp(self, x): def entropy(self): a0 = self.inputs - tf.reduce_max(self.inputs, reduction_indices=[1], - keep_dims=True) + keepdims=True) ea0 = tf.exp(a0) - z0 = tf.reduce_sum(ea0, reduction_indices=[1], keep_dims=True) + z0 = tf.reduce_sum(ea0, reduction_indices=[1], keepdims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (tf.log(z0) - a0), reduction_indices=[1]) def kl(self, other): a0 = self.inputs - tf.reduce_max(self.inputs, reduction_indices=[1], - keep_dims=True) + keepdims=True) a1 = other.inputs - tf.reduce_max(other.inputs, reduction_indices=[1], - keep_dims=True) + keepdims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) - z0 = tf.reduce_sum(ea0, reduction_indices=[1], keep_dims=True) - z1 = tf.reduce_sum(ea1, reduction_indices=[1], keep_dims=True) + z0 = tf.reduce_sum(ea0, reduction_indices=[1], keepdims=True) + z1 = tf.reduce_sum(ea1, reduction_indices=[1], keepdims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), reduction_indices=[1])