From 1b6949e44509fcc8fba64b4daf09db96de9fb357 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Tue, 18 Nov 2025 12:39:22 +0000
Subject: [PATCH 1/2] [rllib] Fix custom model-config mismatch between
 env-runner and learner

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/algorithms/algorithm_config.py          |  6 ++++++
 rllib/core/rl_module/torch/torch_rl_module.py | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index ecf3c16b6404..4b1937ea4d10 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -4452,6 +4452,12 @@ def get_rl_module_spec(
         # If module_config_dict is not defined, set to our generic one.
         if rl_module_spec.model_config is None:
             rl_module_spec.model_config = self.model_config
+        # Otherwise we combine the two dictionaries where settings from the
+        # `RLModuleSpec` have higher priority.
+        else:
+            rl_module_spec.model_config = (
+                self.model_config | rl_module_spec._get_model_config()
+            )
 
         if inference_only is not None:
             rl_module_spec.inference_only = inference_only
diff --git a/rllib/core/rl_module/torch/torch_rl_module.py b/rllib/core/rl_module/torch/torch_rl_module.py
index b35c9a6572fa..ea5e7e852d6d 100644
--- a/rllib/core/rl_module/torch/torch_rl_module.py
+++ b/rllib/core/rl_module/torch/torch_rl_module.py
@@ -130,7 +130,19 @@ def set_state(self, state: StateDict) -> None:
         # these keys (strict=False). This is most likely due to `state` coming from
         # an `inference_only=False` RLModule, while `self` is an `inference_only=True`
         # RLModule.
-        self.load_state_dict(convert_to_torch_tensor(state), strict=False)
+        missing_keys, unexpected_keys = self.load_state_dict(
+            convert_to_torch_tensor(state), strict=False
+        )
+
+        # For inference_only modules, missing_keys should always be empty.
+        # If there are missing keys, it means the target module expects parameters
+        # that don't exist in the source, indicating an architecture mismatch.
+        if self.inference_only and missing_keys:
+            raise ValueError(
+                "Architecture mismatch detected when loading state into inference_only module! "
+                f"Missing parameters (not found in source state): {list(missing_keys)} "
+                "This usually indicates the learner and env-runner have different architectures."
+            )
 
     @OverrideToImplementCustomLogic
     @override(RLModule)

From 69d2f6debd7cfbb5afad269d495592af9d07ab33 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark@anyscale.com>
Date: Fri, 21 Nov 2025 11:42:06 +0000
Subject: [PATCH 2/2] Improve error message

Signed-off-by: Mark Towers <mark@anyscale.com>
---
 rllib/core/rl_module/torch/torch_rl_module.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rllib/core/rl_module/torch/torch_rl_module.py b/rllib/core/rl_module/torch/torch_rl_module.py
index ea5e7e852d6d..dfb5b4f9dcf4 100644
--- a/rllib/core/rl_module/torch/torch_rl_module.py
+++ b/rllib/core/rl_module/torch/torch_rl_module.py
@@ -139,9 +139,9 @@ def set_state(self, state: StateDict) -> None:
         # that don't exist in the source, indicating an architecture mismatch.
         if self.inference_only and missing_keys:
             raise ValueError(
-                "Architecture mismatch detected when loading state into inference_only module! "
-                f"Missing parameters (not found in source state): {list(missing_keys)} "
-                "This usually indicates the learner and env-runner have different architectures."
+                f"Updating the module's state is missing keys: {list(missing_keys)} "
+                "This is most likely because the state has different layer names (or are missing layers). "
+                f"Complete list of state keys is {list(state.keys())}"
             )
 
     @OverrideToImplementCustomLogic