From d1c4d521cbf283bd97b73dab823800bc43a828b3 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Fri, 19 Sep 2025 16:00:30 +0200 Subject: [PATCH 1/3] Move new restart message into EnvRunner --- rllib/env/env_runner.py | 4 +--- rllib/env/multi_agent_env_runner.py | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/rllib/env/env_runner.py b/rllib/env/env_runner.py index 04191cf28900..6b73c710d95d 100644 --- a/rllib/env/env_runner.py +++ b/rllib/env/env_runner.py @@ -236,12 +236,10 @@ def _try_env_step(self, actions): except Exception as e: self.metrics.log_value(NUM_ENV_STEP_FAILURES_LIFETIME, 1, reduce="sum") - # @OldAPIStack (config.restart_failed_sub_environments) if self.config.restart_failed_sub_environments: if not isinstance(e, StepFailedRecreateEnvError): logger.exception( - "Stepping the env resulted in an error! The original error " - f"is: {e}" + f"RLlib {self.__class__.__name__}: Environment step failed. Will force reset env(s) in this EnvRunner. The original error is: {e}" ) # Recreate the env. self.make_env() diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index f1c5922eab8c..0167e55a0b8d 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -363,9 +363,6 @@ def _sample( # Try stepping the environment. results = self._try_env_step(actions_for_env) if results == ENV_STEP_FAILURE: - logging.warning( - f"RLlib {self.__class__.__name__}: Environment step failed. Will force reset env(s) in this EnvRunner." - ) return self._sample( num_timesteps=num_timesteps, num_episodes=num_episodes, From 892570cfcc2af7dbe3d7192920fecf8e029ee0f8 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Wed, 24 Sep 2025 10:48:44 +0200 Subject: [PATCH 2/3] Kamil's comment --- rllib/env/env_runner.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rllib/env/env_runner.py b/rllib/env/env_runner.py index 6b73c710d95d..55189f78b2ef 100644 --- a/rllib/env/env_runner.py +++ b/rllib/env/env_runner.py @@ -248,11 +248,20 @@ def _try_env_step(self, actions): # data and repeating the step attempt). return ENV_STEP_FAILURE else: + logger.exception( + f"RLlib {self.__class__.__name__}: Environment step failed and " + "'config.restart_failed_sub_environments' is False. " + "This env will not be recreated. " + "Consider setting 'fault_tolerance(restart_failed_sub_environments=True)' in your AlgorithmConfig " + "in order to automatically re-create and force-reset an env." + f"The original error type: {type(e)}. " + f"{e}" + ) if isinstance(e, StepFailedRecreateEnvError): - raise ValueError( + raise RuntimeError( "Environment raised StepFailedRecreateEnvError but config.restart_failed_sub_environments is False." ) from e - raise e + raise RuntimeError from e def _convert_to_tensor(self, struct) -> TensorType: """Converts structs to a framework-specific tensor.""" From f00238995f9af9f7f0466f703340747d25918fa7 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Wed, 24 Sep 2025 22:14:19 +0200 Subject: [PATCH 3/3] kamils comment --- rllib/env/env_runner.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rllib/env/env_runner.py b/rllib/env/env_runner.py index 55189f78b2ef..95588a7e8906 100644 --- a/rllib/env/env_runner.py +++ b/rllib/env/env_runner.py @@ -257,10 +257,6 @@ def _try_env_step(self, actions): f"The original error type: {type(e)}. " f"{e}" ) - if isinstance(e, StepFailedRecreateEnvError): - raise RuntimeError( - "Environment raised StepFailedRecreateEnvError but config.restart_failed_sub_environments is False." - ) from e raise RuntimeError from e def _convert_to_tensor(self, struct) -> TensorType: