diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml index d5dec2eeb38..d2c0e84fecd 100644 --- a/source/isaaclab/config/extension.toml +++ b/source/isaaclab/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.47.0" +version = "0.47.1" # Description title = "Isaac Lab framework for Robot Learning" diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst index a10308a5a0f..2ed17e81557 100644 --- a/source/isaaclab/docs/CHANGELOG.rst +++ b/source/isaaclab/docs/CHANGELOG.rst @@ -1,6 +1,18 @@ Changelog --------- +0.47.1 (2025-10-15) +~~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Fixed termination term bookkeeping in :class:`~isaaclab.managers.TerminationManager`: + per-step termination and last-episode termination bookkeeping are now separated. + last-episode dones are now updated once per step from all term outputs, avoiding per-term overwrites + and ensuring Episode_Termination metrics reflect the actual triggering terms. + + 0.47.0 (2025-10-14) ~~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py index 2c732b46363..023cbc86696 100644 --- a/source/isaaclab/isaaclab/managers/termination_manager.py +++ b/source/isaaclab/isaaclab/managers/termination_manager.py @@ -63,6 +63,8 @@ def __init__(self, cfg: object, env: ManagerBasedRLEnv): self._term_name_to_term_idx = {name: i for i, name in enumerate(self._term_names)} # prepare extra info to store individual termination term information self._term_dones = torch.zeros((self.num_envs, len(self._term_names)), device=self.device, dtype=torch.bool) + # prepare extra info to store last episode done per termination term information + self._last_episode_dones = torch.zeros_like(self._term_dones) # create buffer for managing termination per environment self._truncated_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) self._terminated_buf = torch.zeros_like(self._truncated_buf) @@ -138,7 +140,7 @@ def reset(self, env_ids: Sequence[int] | None = None) -> dict[str, torch.Tensor] env_ids = slice(None) # add to episode dict extras = {} - last_episode_done_stats = self._term_dones.float().mean(dim=0) + last_episode_done_stats = self._last_episode_dones.float().mean(dim=0) for i, key in enumerate(self._term_names): # store information extras["Episode_Termination/" + key] = last_episode_done_stats[i].item() @@ -169,15 +171,17 @@ def compute(self) -> torch.Tensor: else: self._terminated_buf |= value # add to episode dones - rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing - if rows.numel() > 0: - self._term_dones[rows] = False - self._term_dones[rows, i] = True + self._term_dones[:, i] = value + # update last-episode dones once per compute: for any env where a term fired, + # reflect exactly which term(s) fired this step and clear others + rows = self._term_dones.any(dim=1).nonzero(as_tuple=True)[0] + if rows.numel() > 0: + self._last_episode_dones[rows] = self._term_dones[rows] # return combined termination signal return self._truncated_buf | self._terminated_buf def get_term(self, name: str) -> torch.Tensor: - """Returns the termination term with the specified name. + """Returns the termination term value at current step with the specified name. Args: name: The name of the termination term. @@ -190,7 +194,8 @@ def get_term(self, name: str) -> torch.Tensor: def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequence[float]]]: """Returns the active terms as iterable sequence of tuples. - The first element of the tuple is the name of the term and the second element is the raw value(s) of the term. + The first element of the tuple is the name of the term and the second element is the raw value(s) of the term + recorded at current step. Args: env_idx: The specific environment to pull the active terms from.