pytorch · vmoens · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/test/test_modules.py b/test/test_modules.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import argparse
+
 from numbers import Number
 
 import numpy as np
@@ -947,6 +948,31 @@ def test_multiagent_mlp_lazy(self):
             if isinstance(p, torch.nn.parameter.UninitializedParameter):
                 raise AssertionError("UninitializedParameter found")
 
+    @pytest.mark.parametrize("n_agents", [1, 3])
+    @pytest.mark.parametrize("share_params", [True, False])
+    @pytest.mark.parametrize("centralised", [True, False])
+    def test_reset_mlp(
+        self,
+        n_agents,
+        centralised,
+        share_params,
+    ):
+        actor_net = MultiAgentMLP(
+            n_agent_inputs=4,
+            n_agent_outputs=6,
+            num_cells=[5, 5],
+            n_agents=n_agents,
+            centralised=centralised,
+            share_params=share_params,
+        )
+        params_before = actor_net.params.clone()
+        actor_net.reset_parameters()
+        params_after = actor_net.params
+        for p1, p2 in zip(
+            params_before.values(True, True), params_after.values(True, True)
+        ):
+            assert not torch.isclose(p1, p2).all()
+
     @pytest.mark.parametrize("n_agents", [1, 3])
     @pytest.mark.parametrize("share_params", [True, False])
     @pytest.mark.parametrize("centralised", [True, False])
@@ -1051,6 +1077,30 @@ def test_multiagent_cnn_lazy(self):
             if isinstance(p, torch.nn.parameter.UninitializedParameter):
                 raise AssertionError("UninitializedParameter found")
 
+    @pytest.mark.parametrize("n_agents", [1, 3])
+    @pytest.mark.parametrize("share_params", [True, False])
+    @pytest.mark.parametrize("centralised", [True, False])
+    def test_reset_cnn(
+        self,
+        n_agents,
+        centralised,
+        share_params,
+    ):
+        actor_net = MultiAgentConvNet(
+            in_features=4,
+            num_cells=[5, 5],
+            n_agents=n_agents,
+            centralised=centralised,
+            share_params=share_params,
+        )
+        params_before = actor_net.params.clone()
+        actor_net.reset_parameters()
+        params_after = actor_net.params
+        for p1, p2 in zip(
+            params_before.values(True, True), params_after.values(True, True)
+        ):
+            assert not torch.isclose(p1, p2).all()
+
     @pytest.mark.parametrize("n_agents", [1, 3])
     @pytest.mark.parametrize(
         "batch",
@@ -1271,7 +1321,6 @@ def test_onlinedtactor(self, batch_dims, T=5):
 @pytest.mark.parametrize("device", get_default_devices())
 @pytest.mark.parametrize("bias", [True, False])
 def test_python_lstm_cell(device, bias):
-
     lstm_cell1 = LSTMCell(10, 20, device=device, bias=bias)
     lstm_cell2 = nn.LSTMCell(10, 20, device=device, bias=bias)
 
@@ -1307,7 +1356,6 @@ def test_python_lstm_cell(device, bias):
 @pytest.mark.parametrize("device", get_default_devices())
 @pytest.mark.parametrize("bias", [True, False])
 def test_python_gru_cell(device, bias):
-
     gru_cell1 = GRUCell(10, 20, device=device, bias=bias)
     gru_cell2 = nn.GRUCell(10, 20, device=device, bias=bias)
 

diff --git a/torchrl/modules/models/multiagent.py b/torchrl/modules/models/multiagent.py
@@ -16,6 +16,7 @@
 from torchrl.data.utils import DEVICE_TYPING
 
 from torchrl.modules.models import ConvNet, MLP
+from torchrl.modules.models.utils import _reset_parameters_recursive
 
 
 class MultiAgentNetBase(nn.Module):
@@ -30,6 +31,7 @@ def __init__(
         centralised: bool,
         share_params: bool,
         agent_dim: int,
+        vmap_randomness: str = "different",
         **kwargs,
     ):
         super().__init__()
@@ -38,6 +40,7 @@ def __init__(
         self.share_params = share_params
         self.centralised = centralised
         self.agent_dim = agent_dim
+        self._vmap_randomness = vmap_randomness
 
         agent_networks = [
             self._build_single_net(**kwargs)
@@ -54,9 +57,11 @@ def __init__(
         self.__dict__["_empty_net"] = self._build_single_net(**kwargs)
 
     @property
-    def _vmap_randomness(self):
+    def vmap_randomness(self):
         if self.initialized:
-            return "error"
+            return self._vmap_randomness
+        # Matteo: There seems to be a problem with lazy layers when using "different" here
+        # found this bit as legacy, not sure the reason
         return "same"
 
     def _make_params(self, agent_networks):
@@ -92,14 +97,14 @@ def forward(self, *inputs: Tuple[torch.Tensor]) -> torch.Tensor:
         if not self.share_params:
             if self.centralised:
                 output = self.vmap_func_module(
-                    self._empty_net, (0, None), (-2,), randomness=self._vmap_randomness
+                    self._empty_net, (0, None), (-2,), randomness=self.vmap_randomness
                 )(self.params, inputs)
             else:
                 output = self.vmap_func_module(
                     self._empty_net,
                     (0, self.agent_dim),
                     (-2,),
-                    randomness=self._vmap_randomness,
+                    randomness=self.vmap_randomness,
                 )(self.params, inputs)
 
         # If parameters are shared, agents use the same network
@@ -125,6 +130,21 @@ def forward(self, *inputs: Tuple[torch.Tensor]) -> torch.Tensor:
 
         return output
 
+    def reset_parameters(self):
+        def vmap_reset_module(module, *args, **kwargs):
+            def reset_module(params):
+                with params.to_module(module):
+                    _reset_parameters_recursive(module)
+                    return params
+
+            return torch.vmap(reset_module, *args, **kwargs)
+
+        if not self.share_params:
+            vmap_reset_module(self._empty_net, randomness="different")(self.params)
+        else:
+            with self.params.to_module(self._empty_net):
+                _reset_parameters_recursive(self._empty_net)
+
 
 class MultiAgentMLP(MultiAgentNetBase):
     """Mult-agent MLP.
@@ -262,7 +282,6 @@ def __init__(
         activation_class: Optional[Type[nn.Module]] = nn.Tanh,
         **kwargs,
     ):
-
         self.n_agents = n_agents
         self.n_agent_inputs = n_agent_inputs
         self.n_agent_outputs = n_agent_outputs
@@ -477,6 +496,7 @@ def __init__(
             share_params=share_params,
             device=device,
             agent_dim=-4,
+            **kwargs,
         )
 
     def _build_single_net(self, *, device, **kwargs):

diff --git a/torchrl/modules/models/utils.py b/torchrl/modules/models/utils.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import inspect
+import warnings
 from typing import Optional, Sequence, Type
 
 import torch
@@ -123,3 +124,27 @@ def create_on_device(
     else:
         return module_class(*args, **kwargs).to(device)
         # .to() is always available for nn.Module, and does nothing if the Module contains no parameters or buffers
+
+
+def _reset_parameters_recursive(module, warn_if_no_op: bool = True) -> bool:
+    """Recursively resets the parameters of a :class:`~torch.nn.Module` in-place.
+
+    Args:
+        module (torch.nn.Module): the module to reset.
+        warn_if_no_op (bool, optional): whether to raise a warning in case this is a no-op.
+            Defaults to ``True``.
+
+    Returns: whether any parameter has been reset.
+
+    """
+    any_reset = False
+    for layer in module.children():
+        if hasattr(layer, "reset_parameters"):
+            layer.reset_parameters()
+            any_reset |= True
+        any_reset |= _reset_parameters_recursive(layer, warn_if_no_op=False)
+    if warn_if_no_op and not any_reset:
+        warnings.warn(
+            "_reset_parameters_recursive was called without the parameters argument and did not find any parameters to reset"
+        )
+    return any_reset