diff --git a/python/ray/rllib/ppo/loss.py b/python/ray/rllib/ppo/loss.py index f57502d18b40..14f4083031ed 100644 --- a/python/ray/rllib/ppo/loss.py +++ b/python/ray/rllib/ppo/loss.py @@ -60,7 +60,7 @@ def __init__( self.value_function - prev_vf_preds, -config["clip_param"], config["clip_param"]) self.vf_loss2 = tf.square(vf_clipped - value_targets) - self.vf_loss = tf.minimum(self.vf_loss1, self.vf_loss2) + self.vf_loss = tf.maximum(self.vf_loss1, self.vf_loss2) self.mean_vf_loss = tf.reduce_mean(self.vf_loss) self.loss = tf.reduce_mean( -self.surr + kl_coeff * self.kl +