Separates per-step termination and last-episode termination bookkeeping (#3745)

ooctipus · kellyguo11 · web-flow · commit f4982455cb4c · 2025-11-07T10:56:48.000-08:00
# Description This PR fixes the issue where get_done_term returned last episode value rather than current step value. This PR realizes values used for get_term should be different from that used for logging, and mixed useage leads to non-intuitive behavior. using per-step value for logging leads to overcounting and undercounting reported in #2977 using last-episode value for get_term leads to misalignment with expectation reported in #3720 Fixes #2977 #3720 --- The logging behavior remains *mostly* the same as #3107, and and also got rid of the weird overwriting behavior(yay). I get exactly the same termination curve as #3107 when run on `Isaac-Velocity-Rough-Anymal-C-v0` Here is a benchmark summary with 1000 steps running `Isaac-Velocity-Rough-Anymal-C-v0 ` with 4096 envs Before #3107: `| termination.compute | 0.229 ms|` `| termination.reset | 0.007 ms|` PR #3107: `| termination.compute | 0.274 ms|` `| termination.reset | 0.004 ms|` This PR: `| termination.compute | 0.258 ms|` `| termination.reset | 0.004 ms|` We actually see improvement, this is due to the fact that expensive maintenance of last_episode_value is only computed once per compute(#3107 computes last_episode_value for every term) ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have read and understood the [contribution guidelines](https://isaac-sim.github.io/IsaacLab/main/source/refs/contributing.html) - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com> Co-authored-by: Kelly Guo <kellyg@nvidia.com>
diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.47.8"
+version = "0.47.9"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
@@ -1,6 +1,18 @@
 Changelog
 ---------
 
+0.47.9 (2025-11-05)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Fixed termination term bookkeeping in :class:`~isaaclab.managers.TerminationManager`:
+  per-step termination and last-episode termination bookkeeping are now separated.
+  last-episode dones are now updated once per step from all term outputs, avoiding per-term overwrites
+  and ensuring Episode_Termination metrics reflect the actual triggering terms.
+
+
 0.47.8 (2025-11-06)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab/isaaclab/managers/termination_manager.py b/source/isaaclab/isaaclab/managers/termination_manager.py
@@ -63,6 +63,8 @@ def __init__(self, cfg: object, env: ManagerBasedRLEnv):
         self._term_name_to_term_idx = {name: i for i, name in enumerate(self._term_names)}
         # prepare extra info to store individual termination term information
         self._term_dones = torch.zeros((self.num_envs, len(self._term_names)), device=self.device, dtype=torch.bool)
+        # prepare extra info to store last episode done per termination term information
+        self._last_episode_dones = torch.zeros_like(self._term_dones)
         # create buffer for managing termination per environment
         self._truncated_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
         self._terminated_buf = torch.zeros_like(self._truncated_buf)
@@ -138,7 +140,7 @@ def reset(self, env_ids: Sequence[int] | None = None) -> dict[str, torch.Tensor]
             env_ids = slice(None)
         # add to episode dict
         extras = {}
-        last_episode_done_stats = self._term_dones.float().mean(dim=0)
+        last_episode_done_stats = self._last_episode_dones.float().mean(dim=0)
         for i, key in enumerate(self._term_names):
             # store information
             extras["Episode_Termination/" + key] = last_episode_done_stats[i].item()
@@ -169,15 +171,17 @@ def compute(self) -> torch.Tensor:
             else:
                 self._terminated_buf |= value
             # add to episode dones
-            rows = value.nonzero(as_tuple=True)[0]  # indexing is cheaper than boolean advance indexing
-            if rows.numel() > 0:
-                self._term_dones[rows] = False
-                self._term_dones[rows, i] = True
+            self._term_dones[:, i] = value
+        # update last-episode dones once per compute: for any env where a term fired,
+        # reflect exactly which term(s) fired this step and clear others
+        rows = self._term_dones.any(dim=1).nonzero(as_tuple=True)[0]
+        if rows.numel() > 0:
+            self._last_episode_dones[rows] = self._term_dones[rows]
         # return combined termination signal
         return self._truncated_buf | self._terminated_buf
 
     def get_term(self, name: str) -> torch.Tensor:
-        """Returns the termination term with the specified name.
+        """Returns the termination term value at current step with the specified name.
 
         Args:
             name: The name of the termination term.
@@ -190,7 +194,8 @@ def get_term(self, name: str) -> torch.Tensor:
     def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequence[float]]]:
         """Returns the active terms as iterable sequence of tuples.
 
-        The first element of the tuple is the name of the term and the second element is the raw value(s) of the term.
+        The first element of the tuple is the name of the term and the second element is the raw value(s) of the term
+        recorded at current step.
 
         Args:
             env_idx: The specific environment to pull the active terms from.
diff --git a/source/isaaclab/test/managers/test_termination_manager.py b/source/isaaclab/test/managers/test_termination_manager.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Launch Isaac Sim Simulator first."""
+
+from isaaclab.app import AppLauncher
+
+# launch omniverse app
+simulation_app = AppLauncher(headless=True).app
+
+"""Rest everything follows."""
+
+import torch
+
+import pytest
+
+from isaaclab.managers import TerminationManager, TerminationTermCfg
+from isaaclab.sim import SimulationContext
+
+
+class DummyEnv:
+    """Minimal mutable env stub for the termination manager tests."""
+
+    def __init__(self, num_envs: int, device: str, sim: SimulationContext):
+        self.num_envs = num_envs
+        self.device = device
+        self.sim = sim
+        self.counter = 0  # mutable step counter used by test terms
+
+
+def fail_every_5_steps(env) -> torch.Tensor:
+    """Returns True for all envs when counter is a positive multiple of 5."""
+    cond = env.counter > 0 and (env.counter % 5 == 0)
+    return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
+
+
+def fail_every_10_steps(env) -> torch.Tensor:
+    """Returns True for all envs when counter is a positive multiple of 10."""
+    cond = env.counter > 0 and (env.counter % 10 == 0)
+    return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
+
+
+def fail_every_3_steps(env) -> torch.Tensor:
+    """Returns True for all envs when counter is a positive multiple of 3."""
+    cond = env.counter > 0 and (env.counter % 3 == 0)
+    return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
+
+
+@pytest.fixture
+def env():
+    sim = SimulationContext()
+    return DummyEnv(num_envs=20, device="cpu", sim=sim)
+
+
+def test_initial_state_and_shapes(env):
+    cfg = {
+        "term_5": TerminationTermCfg(func=fail_every_5_steps),
+        "term_10": TerminationTermCfg(func=fail_every_10_steps),
+    }
+    tm = TerminationManager(cfg, env)
+
+    # Active term names
+    assert tm.active_terms == ["term_5", "term_10"]
+
+    # Internal buffers have expected shapes and start as all False
+    assert tm._term_dones.shape == (env.num_envs, 2)
+    assert tm._last_episode_dones.shape == (env.num_envs, 2)
+    assert tm.dones.shape == (env.num_envs,)
+    assert tm.time_outs.shape == (env.num_envs,)
+    assert tm.terminated.shape == (env.num_envs,)
+    assert torch.all(~tm._term_dones) and torch.all(~tm._last_episode_dones)
+
+
+def test_term_transitions_and_persistence(env):
+    """Concise transitions: single fire, persist, switch, both, persist.
+
+    Uses 3-step and 5-step terms and verifies current-step values and last-episode persistence.
+    """
+    cfg = {
+        "term_3": TerminationTermCfg(func=fail_every_3_steps, time_out=False),
+        "term_5": TerminationTermCfg(func=fail_every_5_steps, time_out=False),
+    }
+    tm = TerminationManager(cfg, env)
+
+    # step 3: only term_3 -> last_episode [True, False]
+    env.counter = 3
+    out = tm.compute()
+    assert torch.all(tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
+    assert torch.all(out)
+    assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(~tm._last_episode_dones[:, 1])
+
+    # step 4: none -> last_episode persists [True, False]
+    env.counter = 4
+    out = tm.compute()
+    assert torch.all(~out)
+    assert torch.all(~tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
+    assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(~tm._last_episode_dones[:, 1])
+
+    # step 5: only term_5 -> last_episode [False, True]
+    env.counter = 5
+    out = tm.compute()
+    assert torch.all(~tm.get_term("term_3")) and torch.all(tm.get_term("term_5"))
+    assert torch.all(out)
+    assert torch.all(~tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
+
+    # step 15: both -> last_episode [True, True]
+    env.counter = 15
+    out = tm.compute()
+    assert torch.all(tm.get_term("term_3")) and torch.all(tm.get_term("term_5"))
+    assert torch.all(out)
+    assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
+
+    # step 16: none -> persist [True, True]
+    env.counter = 16
+    out = tm.compute()
+    assert torch.all(~out)
+    assert torch.all(~tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
+    assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
+
+
+def test_time_out_vs_terminated_split(env):
+    cfg = {
+        "term_5": TerminationTermCfg(func=fail_every_5_steps, time_out=False),  # terminated
+        "term_10": TerminationTermCfg(func=fail_every_10_steps, time_out=True),  # timeout
+    }
+    tm = TerminationManager(cfg, env)
+
+    # Step 5: terminated fires, not timeout
+    env.counter = 5
+    out = tm.compute()
+    assert torch.all(out)
+    assert torch.all(tm.terminated) and torch.all(~tm.time_outs)
+
+    # Step 10: both fire; timeout and terminated both True
+    env.counter = 10
+    out = tm.compute()
+    assert torch.all(out)
+    assert torch.all(tm.terminated) and torch.all(tm.time_outs)