pytorch · vmoens · Aug 30, 2023 · Aug 4, 2023 · Aug 4, 2023 · Aug 4, 2023
diff --git a/benchmarks/test_envs_benchmark.py b/benchmarks/test_envs_benchmark.py
@@ -118,9 +118,9 @@ def test_step_mdp_speed(
     benchmark(
         step_mdp,
         td,
-        action_key=action_key,
-        reward_key=reward_key,
-        done_key=done_key,
+        action_keys=action_key,
+        reward_keys=reward_key,
+        done_keys=done_key,
         keep_other=keep_other,
         exclude_reward=exclude_reward,
         exclude_done=exclude_done,

diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -1483,3 +1483,237 @@ def _step(
 
     def _set_seed(self, seed: Optional[int]):
         torch.manual_seed(seed)
+
+
+class MultiKeyCountingEnvPolicy:
+    def __init__(
+        self,
+        full_action_spec: TensorSpec,
+        count: bool = True,
+        deterministic: bool = False,
+    ):
+        if not deterministic and not count:
+            raise ValueError("Not counting policy is always deterministic")
+
+        self.full_action_spec = full_action_spec
+        self.count = count
+        self.deterministic = deterministic
+
+    def __call__(self, td: TensorDictBase) -> TensorDictBase:
+        action_td = self.full_action_spec.zero()
+        if self.count:
+            if self.deterministic:
+                action_td["nested_1", "action"] += 1
+                action_td["nested_2", "azione"] += 1
+                action_td["action"][..., 1] = 1
+            else:
+                # We choose an action at random
+                choice = torch.randint(0, 3, ()).item()
+                if choice == 0:
+                    action_td["nested_1", "action"] += 1
+                elif choice == 1:
+                    action_td["nested_2", "azione"] += 1
+                else:
+                    action_td["action"][..., 1] = 1
+        return td.update(action_td)
+
+
+class MultiKeyCountingEnv(EnvBase):
+    def __init__(self, max_steps: int = 5, start_val: int = 0, **kwargs):
+        super().__init__(**kwargs)
+
+        self.max_steps = max_steps
+        self.start_val = start_val
+        self.nested_dim_1 = 3
+        self.nested_dim_2 = 2
+
+        count = torch.zeros((*self.batch_size, 1), device=self.device, dtype=torch.int)
+        count_nested_1 = torch.zeros(
+            (*self.batch_size, self.nested_dim_1, 1),
+            device=self.device,
+            dtype=torch.int,
+        )
+        count_nested_2 = torch.zeros(
+            (*self.batch_size, self.nested_dim_2, 1),
+            device=self.device,
+            dtype=torch.int,
+        )
+
+        count[:] = self.start_val
+        count_nested_1[:] = self.start_val
+        count_nested_2[:] = self.start_val
+
+        self.register_buffer("count", count)
+        self.register_buffer("count_nested_1", count_nested_1)
+        self.register_buffer("count_nested_2", count_nested_2)
+
+        self.make_specs()
+
+        self.action_spec = self.unbatched_action_spec.expand(
+            *self.batch_size, *self.unbatched_action_spec.shape
+        )
+        self.observation_spec = self.unbatched_observation_spec.expand(
+            *self.batch_size, *self.unbatched_observation_spec.shape
+        )
+        self.reward_spec = self.unbatched_reward_spec.expand(
+            *self.batch_size, *self.unbatched_reward_spec.shape
+        )
+        self.done_spec = self.unbatched_done_spec.expand(
+            *self.batch_size, *self.unbatched_done_spec.shape
+        )
+
+    def make_specs(self):
+        self.unbatched_observation_spec = CompositeSpec(
+            nested_1=CompositeSpec(
+                observation=BoundedTensorSpec(
+                    minimum=0, maximum=200, shape=(self.nested_dim_1, 3)
+                ),
+                shape=(self.nested_dim_1,),
+            ),
+            nested_2=CompositeSpec(
+                observation=UnboundedContinuousTensorSpec(shape=(self.nested_dim_2, 2)),
+                shape=(self.nested_dim_2,),
+            ),
+            observation=UnboundedContinuousTensorSpec(
+                shape=(
+                    10,
+                    10,
+                    3,
+                )
+            ),
+        )
+
+        self.unbatched_action_spec = CompositeSpec(
+            nested_1=CompositeSpec(
+                action=DiscreteTensorSpec(n=2, shape=(self.nested_dim_1,)),
+                shape=(self.nested_dim_1,),
+            ),
+            nested_2=CompositeSpec(
+                azione=BoundedTensorSpec(
+                    minimum=0, maximum=100, shape=(self.nested_dim_2, 1)
+                ),
+                shape=(self.nested_dim_2,),
+            ),
+            action=OneHotDiscreteTensorSpec(n=2),
+        )
+
+        self.unbatched_reward_spec = CompositeSpec(
+            nested_1=CompositeSpec(
+                gift=UnboundedContinuousTensorSpec(shape=(self.nested_dim_1, 1)),
+                shape=(self.nested_dim_1,),
+            ),
+            nested_2=CompositeSpec(
+                reward=UnboundedContinuousTensorSpec(shape=(self.nested_dim_2, 1)),
+                shape=(self.nested_dim_2,),
+            ),
+            reward=UnboundedContinuousTensorSpec(shape=(1,)),
+        )
+
+        self.unbatched_done_spec = CompositeSpec(
+            nested_1=CompositeSpec(
+                done=DiscreteTensorSpec(
+                    n=2,
+                    shape=(self.nested_dim_1, 1),
+                    dtype=torch.bool,
+                ),
+                shape=(self.nested_dim_1,),
+            ),
+            nested_2=CompositeSpec(
+                done=DiscreteTensorSpec(
+                    n=2,
+                    shape=(self.nested_dim_2, 1),
+                    dtype=torch.bool,
+                ),
+                shape=(self.nested_dim_2,),
+            ),
+            done=DiscreteTensorSpec(
+                n=2,
+                shape=(1,),
+                dtype=torch.bool,
+            ),
+        )
+
+    def _reset(
+        self,
+        tensordict: TensorDictBase = None,
+        **kwargs,
+    ) -> TensorDictBase:
+        reset_all = False
+        if tensordict is not None:
+            _reset = tensordict.get("_reset", None)
+            if _reset is not None:
+                self.count[_reset.squeeze(-1)] = self.start_val
+
+            _reset_nested_1 = tensordict.get(("nested_1", "_reset"), None)
+            if _reset_nested_1 is not None:
+                self.count_nested_1[_reset_nested_1.squeeze(-1)] = self.start_val
+
+            _reset_nested_2 = tensordict.get(("nested_2", "_reset"), None)
+            if _reset_nested_2 is not None:
+                self.count_nested_2[_reset_nested_2.squeeze(-1)] = self.start_val
+
+            if _reset is None and _reset_nested_1 is None and _reset_nested_2 is None:
+                reset_all = True
+
+        if tensordict is None or reset_all:
+            self.count[:] = self.start_val
+            self.count_nested_1[:] = self.start_val
+            self.count_nested_2[:] = self.start_val
+
+        reset_td = self.observation_spec.zero()
+        reset_td["observation"] += expand_right(
+            self.count, reset_td["observation"].shape
+        )
+        reset_td["nested_1", "observation"] += expand_right(
+            self.count_nested_1, reset_td["nested_1", "observation"].shape
+        )
+        reset_td["nested_2", "observation"] += expand_right(
+            self.count_nested_2, reset_td["nested_2", "observation"].shape
+        )
+
+        reset_td.update(self.output_spec["_done_spec"].zero())
+
+        assert reset_td.batch_size == self.batch_size
+
+        return reset_td
+
+    def _step(
+        self,
+        tensordict: TensorDictBase,
+    ) -> TensorDictBase:
+
+        # Each action has a corresponding reward, done, and observation
+        reward = self.output_spec["_reward_spec"].zero()
+        done = self.output_spec["_done_spec"].zero()
+        td = self.observation_spec.zero()
+
+        one_hot_action = tensordict["action"].argmax(-1).unsqueeze(-1)
+        reward["reward"] += one_hot_action.to(torch.float)
+        self.count += one_hot_action.to(torch.int)
+        td["observation"] += expand_right(self.count, td["observation"].shape)
+        done["done"] = self.count > self.max_steps
+
+        discrete_action = tensordict["nested_1"]["action"].unsqueeze(-1)
+        reward["nested_1"]["gift"] += discrete_action.to(torch.float)
+        self.count_nested_1 += discrete_action.to(torch.int)
+        td["nested_1", "observation"] += expand_right(
+            self.count_nested_1, td["nested_1", "observation"].shape
+        )
+        done["nested_1", "done"] = self.count_nested_1 > self.max_steps
+
+        continuous_action = tensordict["nested_2"]["azione"]
+        reward["nested_2"]["reward"] += continuous_action.to(torch.float)
+        self.count_nested_2 += continuous_action.to(torch.bool)
+        td["nested_2", "observation"] += expand_right(
+            self.count_nested_2, td["nested_2", "observation"].shape
+        )
+        done["nested_2", "done"] = self.count_nested_2 > self.max_steps
+
+        td.update(done)
+        td.update(reward)
+
+        assert td.batch_size == self.batch_size
+        return td.select().set("next", td)
+
+    def _set_seed(self, seed: Optional[int]):
+        torch.manual_seed(seed)
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -22,10 +22,14 @@
     HeteroCountingEnv,
     HeteroCountingEnvPolicy,
     MockSerialEnv,
+    MultiKeyCountingEnv,
+    MultiKeyCountingEnvPolicy,
     NestedCountingEnv,
 )
 from tensordict.nn import TensorDictModule
 from tensordict.tensordict import assert_allclose_td, TensorDict
+
+from test_env import TestMultiKeyEnvs
 from torch import nn
 from torchrl._utils import prod, seed_generator
 from torchrl.collectors import aSyncDataCollector, SyncDataCollector
@@ -47,6 +51,7 @@
 )
 from torchrl.envs.libs.gym import _has_gym, GymEnv
 from torchrl.envs.transforms import TransformedEnv, VecNorm
+from torchrl.envs.utils import _replace_last
 from torchrl.modules import Actor, LSTMNet, OrnsteinUhlenbeckProcessWrapper, SafeModule
 
 # torch.set_default_dtype(torch.double)
@@ -1552,6 +1557,81 @@ def test_multi_collector_het_env_consistency(
         assert_allclose_td(c2, d2)
 
 
+class TestMultiKeyEnvsCollector:
+    @pytest.mark.parametrize("batch_size", [(), (2,), (2, 1)])
+    @pytest.mark.parametrize("frames_per_batch", [4, 8, 16])
+    @pytest.mark.parametrize("max_steps", [2, 3])
+    def test_collector(self, batch_size, frames_per_batch, max_steps, seed=1):
+        env = MultiKeyCountingEnv(batch_size=batch_size, max_steps=max_steps)
+        torch.manual_seed(seed)
+        policy = MultiKeyCountingEnvPolicy(env.input_spec["_action_spec"])
+        ccollector = SyncDataCollector(
+            create_env_fn=env,
+            policy=policy,
+            frames_per_batch=frames_per_batch,
+            total_frames=100,
+            device="cpu",
+        )
+
+        for _td in ccollector:
+            break
+        ccollector.shutdown()
+        for done_key in env.done_keys:
+            assert _replace_last(done_key, "_reset") not in _td.keys(True, True)
+        TestMultiKeyEnvs.check_rollout_consistency(_td, max_steps=max_steps)
+
+    def test_multi_collector_consistency(
+        self, seed=1, frames_per_batch=20, batch_dim=10
+    ):
+        env = MultiKeyCountingEnv(batch_size=(batch_dim,))
+        env_fn = lambda: env
+        torch.manual_seed(seed)
+        policy = MultiKeyCountingEnvPolicy(
+            env.input_spec["_action_spec"], deterministic=True
+        )
+
+        ccollector = MultiaSyncDataCollector(
+            create_env_fn=[env_fn],
+            policy=policy,
+            frames_per_batch=frames_per_batch,
+            total_frames=100,
+            device="cpu",
+        )
+        for i, d in enumerate(ccollector):
+            if i == 0:
+                c1 = d
+            elif i == 1:
+                c2 = d
+            else:
+                break
+        assert d.names[-1] == "time"
+        with pytest.raises(AssertionError):
+            assert_allclose_td(c1, c2)
+        ccollector.shutdown()
+
+        ccollector = MultiSyncDataCollector(
+            create_env_fn=[env_fn],
+            policy=policy,
+            frames_per_batch=frames_per_batch,
+            total_frames=100,
+            device="cpu",
+        )
+        for i, d in enumerate(ccollector):
+            if i == 0:
+                d1 = d
+            elif i == 1:
+                d2 = d
+            else:
+                break
+        assert d.names[-1] == "time"
+        with pytest.raises(AssertionError):
+            assert_allclose_td(d1, d2)
+        ccollector.shutdown()
+
+        assert_allclose_td(c1, d1)
+        assert_allclose_td(c2, d2)
+
+
 @pytest.mark.skipif(not torch.cuda.device_count(), reason="No casting if no cuda")
 class TestUpdateParams:
     class DummyEnv(EnvBase):