python/ray/rllib/agents/ddpg/ddpg_policy_graph.py

-Original file line number
+Diff line change
@@ Expand Up @@
                             stddev=self.config["target_noise"]),
                         -target_noise_clip, target_noise_clip)
                     policy_tp1_smoothed = tf.clip_by_value(
-                        policy_tp1 + clipped_normal_sample, action_space.low,
-                        action_space.high)
+                        policy_tp1 + clipped_normal_sample,
+                        action_space.low * tf.ones_like(policy_tp1),
+                        action_space.high * tf.ones_like(policy_tp1))
                 else:
                     # no smoothing, just use deterministic actions
                     policy_tp1_smoothed = policy_tp1
@@ Expand Down Expand Up / @@ -468,8 +469,9 @@ def make_noisy_actions(): @@
                             tf.shape(deterministic_actions),
                             stddev=self.config["exploration_gaussian_sigma"])
                         stochastic_actions = tf.clip_by_value(
-                            deterministic_actions + normal_sample, action_low,
-                            action_high)
+                            deterministic_actions + normal_sample,
+                            action_low * tf.ones_like(deterministic_actions),
+                            action_high * tf.ones_like(deterministic_actions))
                     elif noise_type == "ou":
                         # add OU noise for exploration, DDPG-style
                         zero_acts = action_low.size * [.0]
@@ Expand All / @@ -489,7 +491,9 @@ def make_noisy_actions(): @@
                         noise = noise_scale * base_scale \
                             * exploration_value * action_range
                         stochastic_actions = tf.clip_by_value(
-                            deterministic_actions + noise, action_low, action_high)
+                            deterministic_actions + noise,
+                            action_low * tf.ones_like(deterministic_actions),
+                            action_high * tf.ones_like(deterministic_actions))
                     else:
                         raise ValueError(
                             "Unknown noise type '%s' (try 'ou' or 'gaussian')" %
@@ Expand Down @@

python/ray/rllib/utils/tf_run_builder.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -48,6 +48,8 @@ def get(self, to_fetch): @@
                         self.session, self.fetches, self.debug_name,
                         self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
                 except Exception:
+                    logger.exception("Error fetching: {}, feed_dict={}".format(
+                        self.fetches, self.feed_dict))
                     raise ValueError("Error fetching: {}, feed_dict={}".format(
                         self.fetches, self.feed_dict))
             if isinstance(to_fetch, int):
@@ Expand Down @@

[rllib] fix clip by value issue as TF upgraded #4697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

ericl merged 3 commits into ray-project:master from alibaba-archive:dev_fix_clip_by_value

May 13, 2019

-Original file line number
+Diff line change
@@ Expand Up @@
                             stddev=self.config["target_noise"]),
                         -target_noise_clip, target_noise_clip)
                     policy_tp1_smoothed = tf.clip_by_value(
-                        policy_tp1 + clipped_normal_sample, action_space.low,
-                        action_space.high)
+                        policy_tp1 + clipped_normal_sample,
+                        action_space.low * tf.ones_like(policy_tp1),
+                        action_space.high * tf.ones_like(policy_tp1))
                 else:
                     # no smoothing, just use deterministic actions
                     policy_tp1_smoothed = policy_tp1
@@ Expand Down Expand Up / @@ -468,8 +469,9 @@ def make_noisy_actions(): @@
                             tf.shape(deterministic_actions),
                             stddev=self.config["exploration_gaussian_sigma"])
                         stochastic_actions = tf.clip_by_value(
-                            deterministic_actions + normal_sample, action_low,
-                            action_high)
+                            deterministic_actions + normal_sample,
+                            action_low * tf.ones_like(deterministic_actions),
+                            action_high * tf.ones_like(deterministic_actions))
                     elif noise_type == "ou":
                         # add OU noise for exploration, DDPG-style
                         zero_acts = action_low.size * [.0]
@@ Expand All / @@ -489,7 +491,9 @@ def make_noisy_actions(): @@
                         noise = noise_scale * base_scale \
                             * exploration_value * action_range
                         stochastic_actions = tf.clip_by_value(
-                            deterministic_actions + noise, action_low, action_high)
+                            deterministic_actions + noise,
+                            action_low * tf.ones_like(deterministic_actions),
+                            action_high * tf.ones_like(deterministic_actions))
                     else:
                         raise ValueError(
                             "Unknown noise type '%s' (try 'ou' or 'gaussian')" %
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -48,6 +48,8 @@ def get(self, to_fetch): @@
                         self.session, self.fetches, self.debug_name,
                         self.feed_dict, os.environ.get("TF_TIMELINE_DIR"))
                 except Exception:
+                    logger.exception("Error fetching: {}, feed_dict={}".format(
+                        self.fetches, self.feed_dict))
                     raise ValueError("Error fetching: {}, feed_dict={}".format(
                         self.fetches, self.feed_dict))
             if isinstance(to_fetch, int):
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[rllib] fix clip by value issue as TF upgraded #4697

Uh oh!

Diff view

Diff view

There are no files selected for viewing