From c7b9ebd1391cffe32687b4fc06db6be613ca6224 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Wed, 6 Aug 2025 22:18:22 -0700 Subject: [PATCH 1/6] fix the per-step termination log on reset to per-episode termination log --- .../isaaclab/managers/termination_manager.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 1f88f5f6454..71b4868ea7e 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -61,9 +61,7 @@ def __init__(self, cfg: object, env: ManagerBasedRLEnv): # call the base class constructor (this will parse the terms config) super().__init__(cfg, env) # prepare extra info to store individual termination term information - self._term_dones = dict() - for term_name in self._term_names: - self._term_dones[term_name] = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) + self._term_dones = torch.zeros((self.num_envs, len(self._term_names)), device=self.device, dtype=torch.bool) # create buffer for managing termination per environment self._truncated_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) self._terminated_buf = torch.zeros_like(self._truncated_buf) @@ -139,9 +137,10 @@ def reset(self, env_ids: Sequence[int] | None = None) -> dict[str, torch.Tensor] env_ids = slice(None) # add to episode dict extras = {} - for key in self._term_dones.keys(): + last_episode_done_stats = self._term_dones.float().mean(dim=0) + for i, key in enumerate(self._term_names): # store information - extras["Episode_Termination/" + key] = torch.count_nonzero(self._term_dones[key][env_ids]).item() + extras["Episode_Termination/" + key] = last_episode_done_stats[i].item() # reset all the reward terms for term_cfg in self._class_term_cfgs: term_cfg.func.reset(env_ids=env_ids) @@ -161,7 +160,7 @@ def compute(self) -> torch.Tensor: self._truncated_buf[:] = False self._terminated_buf[:] = False # iterate over all the termination terms - for name, term_cfg in zip(self._term_names, self._term_cfgs): + for i, term_cfg in enumerate(self._term_cfgs): value = term_cfg.func(self._env, **term_cfg.params) # store timeout signal separately if term_cfg.time_out: @@ -169,7 +168,8 @@ def compute(self) -> torch.Tensor: else: self._terminated_buf |= value # add to episode dones - self._term_dones[name][:] = value + self._term_dones[value] = False + self._term_dones[value, i] = True # return combined termination signal return self._truncated_buf | self._terminated_buf @@ -182,7 +182,7 @@ def get_term(self, name: str) -> torch.Tensor: Returns: The corresponding termination term value. Shape is (num_envs,). """ - return self._term_dones[name] + return self._term_dones[name, self._term_names.index(name)] def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequence[float]]]: """Returns the active terms as iterable sequence of tuples. @@ -196,8 +196,8 @@ def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequenc The active terms. """ terms = [] - for key in self._term_dones.keys(): - terms.append((key, [self._term_dones[key][env_idx].float().cpu().item()])) + for i, key in enumerate(self._term_names): + terms.append((key, [self._term_dones[env_idx, i].float().cpu().item()])) return terms """ From fc8c511bafeb9e9b95f7aff0794c6d743abce7dd Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Wed, 6 Aug 2025 22:29:08 -0700 Subject: [PATCH 2/6] update change log --- source/isaaclab/config/extension.toml | 2 +- source/isaaclab/docs/CHANGELOG.rst | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml index 13164255182..02fd7e53082 100644 --- a/source/isaaclab/config/extension.toml +++ b/source/isaaclab/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.44.9" +version = "0.44.10" # Description title = "Isaac Lab framework for Robot Learning" diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst index 0592ae37c0d..e248d8d8360 100644 --- a/source/isaaclab/docs/CHANGELOG.rst +++ b/source/isaaclab/docs/CHANGELOG.rst @@ -1,6 +1,17 @@ Changelog --------- +0.44.10 (2025-08-06) +~~~~~~~~~~~~~~~~~~~ + +Fixed +^^^^^ + +* The old termination manager in :class:`~isaaclab.managers.TerminationManager` term_done logging logs the instantaneous +term done count at reset. This let to inaccurate aggregation of termination count, obscuring the what really happening +during the traing. Instead we log the episodic term done. + + 0.44.9 (2025-07-30) ~~~~~~~~~~~~~~~~~~~ From 842cea03946e3d8bbd207ce6b113c08a7decf9e3 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Sun, 10 Aug 2025 15:28:24 -0700 Subject: [PATCH 3/6] make name indexing O(1) operation --- source/isaaclab/isaaclab/managers/termination_manager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 71b4868ea7e..0babab394d7 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -60,6 +60,7 @@ def __init__(self, cfg: object, env: ManagerBasedRLEnv): # call the base class constructor (this will parse the terms config) super().__init__(cfg, env) + self._term_name_to_term_idx = {name: i for i, name in enumerate(self._term_names)} # prepare extra info to store individual termination term information self._term_dones = torch.zeros((self.num_envs, len(self._term_names)), device=self.device, dtype=torch.bool) # create buffer for managing termination per environment @@ -182,7 +183,7 @@ def get_term(self, name: str) -> torch.Tensor: Returns: The corresponding termination term value. Shape is (num_envs,). """ - return self._term_dones[name, self._term_names.index(name)] + return self._term_dones[:, self._term_name_to_term_idx[name]] def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequence[float]]]: """Returns the active terms as iterable sequence of tuples. @@ -217,7 +218,7 @@ def set_term_cfg(self, term_name: str, cfg: TerminationTermCfg): if term_name not in self._term_names: raise ValueError(f"Termination term '{term_name}' not found.") # set the configuration - self._term_cfgs[self._term_names.index(term_name)] = cfg + self._term_cfgs[self._term_name_to_term_idx[term_name]] = cfg def get_term_cfg(self, term_name: str) -> TerminationTermCfg: """Gets the configuration for the specified term. @@ -234,7 +235,7 @@ def get_term_cfg(self, term_name: str) -> TerminationTermCfg: if term_name not in self._term_names: raise ValueError(f"Termination term '{term_name}' not found.") # return the configuration - return self._term_cfgs[self._term_names.index(term_name)] + return self._term_cfgs[self._term_name_to_term_idx[term_name]] """ Helper functions. From 7ef71ccb4138278c2886e033861d5dbb0e6bc12c Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Tue, 12 Aug 2025 17:08:30 -0700 Subject: [PATCH 4/6] make performance a bit faster --- source/isaaclab/isaaclab/managers/termination_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 0babab394d7..1e06ba51123 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -169,8 +169,10 @@ def compute(self) -> torch.Tensor: else: self._terminated_buf |= value # add to episode dones - self._term_dones[value] = False - self._term_dones[value, i] = True + rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing + if rows.numel() > 0: + self._term_dones.index_fill_(dim=0, index=rows, value=False) + self._term_dones[rows, i] = True # return combined termination signal return self._truncated_buf | self._terminated_buf From 183b4aaa44fd9ae642f5a93fd15cc36df2516f44 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Tue, 12 Aug 2025 17:14:13 -0700 Subject: [PATCH 5/6] make performance a bit faster --- source/isaaclab/isaaclab/managers/termination_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 1e06ba51123..7e35ec596a5 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -171,7 +171,7 @@ def compute(self) -> torch.Tensor: # add to episode dones rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing if rows.numel() > 0: - self._term_dones.index_fill_(dim=0, index=rows, value=False) + self._term_dones[rows] = False self._term_dones[rows, i] = True # return combined termination signal return self._truncated_buf | self._terminated_buf From a0767c60fb9b633d0fb3a6f139aa02be7a38f37d Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Wed, 13 Aug 2025 11:30:58 -0700 Subject: [PATCH 6/6] pass precommit --- source/isaaclab/isaaclab/managers/termination_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 7e35ec596a5..2c732b46363 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -169,7 +169,7 @@ def compute(self) -> torch.Tensor: else: self._terminated_buf |= value # add to episode dones - rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing + rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing if rows.numel() > 0: self._term_dones[rows] = False self._term_dones[rows, i] = True