NVIDIA
diff --git a/‎tensorrt_llm/_torch/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎tensorrt_llm/_torch/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/__init__.py‎
Lines changed: 18 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/__init__.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/auto_mapper.py‎
Lines changed: 17 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/auto_mapper.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/base_checkpoint_loader.py‎
Lines changed: 87 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/base_checkpoint_loader.py‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/base_config_loader.py‎
Lines changed: 13 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/base_config_loader.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/base_weight_loader.py‎
Lines changed: 20 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/base_weight_loader.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py‎
Lines changed: 165 additions & 0 deletions b/‎tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/models/checkpoints/hf/__init__.py‎ b/‎tensorrt_llm/_torch/models/checkpoints/hf/__init__.py‎
@@ -1,4 +1,5 @@
 from .llm import LLM
 from .model_config import MoeLoadBalancerConfig
+from .models.checkpoints.base_checkpoint_loader import BaseCheckpointLoader
 
-__all__ = ["LLM", "MoeLoadBalancerConfig"]
+__all__ = ["LLM", "MoeLoadBalancerConfig", "BaseCheckpointLoader"]
@@ -0,0 +1,18 @@
+from .base_checkpoint_loader import BaseCheckpointLoader
+from .hf.checkpoint_loader import HfCheckpointLoader
+from .hf.config_loader import HfConfigLoader
+from .hf.gemma3_weight_mapper import Gemma3HfWeightMapper
+from .hf.llama4_weight_mapper import Llama4HfWeightMapper
+from .hf.mixtral_weight_mapper import MixtralHfWeightMapper
+from .hf.nemotron_h_weight_mapper import NemotronHHfWeightMapper
+from .hf.qwen2_moe_weight_mapper import Qwen2MoeHfWeightMapper
+from .hf.qwen3_moe_weight_mapper import Qwen3MoeHfWeightMapper
+from .hf.weight_loader import HfWeightLoader
+from .hf.weight_mapper import HfWeightMapper
+
+__all__ = [
+    "HfConfigLoader", "HfWeightLoader", "HfWeightMapper",
+    "BaseCheckpointLoader", "HfCheckpointLoader", "NemotronHHfWeightMapper",
+    "Gemma3HfWeightMapper", "MixtralHfWeightMapper", "Llama4HfWeightMapper",
+    "Qwen2MoeHfWeightMapper", "Qwen3MoeHfWeightMapper"
+]
@@ -0,0 +1,17 @@
+from typing import Optional
+
+from tensorrt_llm._torch.models.modeling_utils import MODEL_CLASS_MAPPER_MAPPING
+
+
+class AutoCheckpointMapper():
+
+    @staticmethod
+    def get(format: str, name: Optional[str] = None) -> "BaseWeightMapper":
+        if name is not None:
+            try:
+                return MODEL_CLASS_MAPPER_MAPPING[f'{name}_{format}']()
+            except KeyError:  # no mapper for this model architecture, resort to default
+                # TODO smor- a potential bug here, if the class isn't added to __init__, it will return the default mapper
+                return MODEL_CLASS_MAPPER_MAPPING[format]()
+        else:
+            return MODEL_CLASS_MAPPER_MAPPING[format]()
@@ -0,0 +1,87 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from torch import nn
+
+from tensorrt_llm._torch.model_config import ModelConfig
+from tensorrt_llm._torch.models.checkpoints.auto_mapper import \
+    AutoCheckpointMapper
+from tensorrt_llm._torch.models.checkpoints.base_config_loader import \
+    BaseConfigLoader
+from tensorrt_llm._torch.models.checkpoints.base_weight_loader import \
+    BaseWeightLoader
+from tensorrt_llm._torch.models.checkpoints.base_weight_mapper import \
+    BaseWeightMapper
+from tensorrt_llm._torch.models.modeling_utils import \
+    CHECKPOINT_LOADER_FORMAT_DEFAULT_MAPPING
+
+
+class BaseCheckpointLoader(ABC):
+
+    @abstractmethod
+    def get_default_weight_loader(self) -> BaseWeightLoader:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_default_config_loader(self) -> BaseConfigLoader:
+        raise NotImplementedError
+
+    @abstractmethod
+    def cleanup(self) -> None:
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def weight_loader(self) -> BaseWeightLoader:
+        ...
+
+    @property
+    @abstractmethod
+    def weight_mapper(self) -> BaseWeightMapper:
+        ...
+
+    @property
+    @abstractmethod
+    def config_loader(self) -> BaseConfigLoader:
+        ...
+
+    @property
+    @abstractmethod
+    def checkpoint_format(self) -> str:
+        ...
+
+    def load_config(self, checkpoint_dir: str, **kwargs) -> ModelConfig:
+        return self.config_loader.load(checkpoint_dir, **kwargs)
+
+    def load_weights(self, checkpoint_dir: str, **kwargs) -> dict[str, Any]:
+        return self.weight_loader.load_weights(checkpoint_dir, **kwargs)
+
+    @classmethod
+    def get(cls, checkpoint_format: str, **kwargs) -> "BaseCheckpointLoader":
+        try:
+            return CHECKPOINT_LOADER_FORMAT_DEFAULT_MAPPING[checkpoint_format](
+                **kwargs)
+        except KeyError:
+            raise ValueError(
+                f"Checkpoint loader for format {checkpoint_format} not found, "
+                f"available formats are: {CHECKPOINT_LOADER_FORMAT_DEFAULT_MAPPING.keys()}"
+            )
+
+    def get_initilized_weight_mapper(self, model: nn.Module,
+                                     config: ModelConfig) -> BaseWeightMapper:
+        weight_mapper = None
+        if self.weight_mapper is not None:
+            self.weight_mapper.init_model_and_config(model, config)
+            return self.weight_mapper
+        else:
+            # The name of the registered mapper should be the model architecture
+            if config.pretrained_config and config.pretrained_config.architectures:
+                model_arch = config.pretrained_config.architectures[0]
+            else:
+                raise ValueError(
+                    "Cannot determine model architecture from config")
+            weight_mapper = AutoCheckpointMapper.get(self.checkpoint_format,
+                                                     model_arch)
+            weight_mapper.init_model_and_config(model, config)
+            self.weight_mapper = weight_mapper
+            return weight_mapper
@@ -0,0 +1,13 @@
+from abc import ABC, abstractmethod
+
+from tensorrt_llm._torch.model_config import ModelConfig
+
+
+class BaseConfigLoader(ABC):
+
+    @abstractmethod
+    def load(self, checkpoint_dir: str, **kwargs) -> ModelConfig:
+        pass
+
+    def cleanup(self) -> None:
+        pass
@@ -0,0 +1,20 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+
+class BaseWeightLoader(ABC):
+
+    @abstractmethod
+    def load_weights(self, checkpoint_dir: str) -> dict[str, Any]:
+        """
+        Loads weights from a checkpoint directory.
+
+        Args:
+            checkpoint_dir: A path to the checkpoint directory.
+
+        Returns:
+            A dictionary where keys are tensor names and values are the tensors.
+        """
+
+    def cleanup(self) -> None:
+        pass
@@ -0,0 +1,165 @@
+from abc import ABC, abstractmethod
+from typing import Callable, List, Union
+
+from torch import nn
+
+from tensorrt_llm._torch.model_config import ModelConfig, TConfig
+from tensorrt_llm._torch.models.modeling_utils import DecoderModelForCausalLM
+
+
+class BaseWeightMapper(ABC):
+
+    def __init__(self):
+        self._callbacks: list[Callable] = []
+        self._mapping: dict = {}
+        self._skip_modules = []
+        self._model: Union[nn.Module, DecoderModelForCausalLM] | None = None
+        self._config: TConfig | None = None
+
+    def init_model_and_config(self, model: Union[nn.Module,
+                                                 DecoderModelForCausalLM],
+                              config: TConfig):
+        self._model = model
+        self._config = config
+
+        if not hasattr(model, 'model_config') or not isinstance(
+                model.model_config, ModelConfig):
+            raise ValueError("model must have a model_config attribute")
+        if not hasattr(model, 'config'):
+            raise ValueError("model must have a config attribute")
+
+        self._tp_size = 1 if model.model_config.mapping.enable_attention_dp else model.model_config.mapping.tp_size
+        self._num_kv_heads = model.config.num_key_value_heads if hasattr(
+            model.config, 'num_key_value_heads'
+        ) and model.config.num_key_value_heads is not None else model.config.num_attention_heads
+
+        self.map_weights()
+
+    def cleanup(self) -> None:
+        self._model = None
+        self._config = None
+
+    @abstractmethod
+    def map_weights(self) -> None:
+        """
+        Maps weights from TRT-LLM to a source state dictionary (e.g., Hugging Face)
+        """
+
+    @abstractmethod
+    def apply_callbacks(self, module: nn.Module, module_name: str,
+                        module_names_breakdown: list[str],
+                        weights: dict) -> list[dict]:
+        """
+        Applies a series of transformation functions to an internal representation
+        of weights or to guide the mapping process. The exact behavior might depend
+        on the implementation (e.g., storing callbacks to be applied later).
+
+        Args:
+            module: The module to apply the callbacks to
+            module_name: The specific module name (e.g., 'qkv_proj', 'gate_up_proj')
+            module_names_breakdown: List of module path components for building full paths
+            weights: The weights dictionary to process
+        """
+
+    def rename_by_params_map(self, params_map: dict[str, str],
+                             weights: dict) -> dict:
+        """
+        Rename weight keys according to regex pattern matching.
+
+        Args:
+            pattern_mapping: A dictionary mapping regex patterns to replacement strings. The key is HF name pattern, and the value is corresponding TRT-LLM name pattern.
+                The patterns will be used to match keys in the weights dict and replace
+                them according to the replacement string, which can use regex backreferences.
+                Example:
+                HF name: vision_model.encoder.layers.1.self_attn.out_proj.{weight,bias}
+                TRT-LLM name: vision_model.encoder.layers.1.self_attn.o_proj.{weight,bias}
+                Then the pattern_mapping could be:
+                pattern_mapping = {
+                    r'(.*?)out_proj(.*)': r'\1o_proj\2'
+                }
+            weights: A dictionary of weights
+
+        Returns:
+            A dictionary of weights with renamed keys
+        """
+        import re
+
+        # Create a new dictionary to store the renamed weights
+        renamed_weights = {}
+
+        # Keep track of keys that have been matched by a pattern
+        matched_keys = set()
+
+        # Process each key in the weights dictionary
+        for key in list(weights.keys()):
+            # Check each pattern for a match
+            for pattern, replacement in params_map.items():
+                if re.match(pattern, key):
+                    # Create the new key by applying the regex replacement
+                    new_key = re.sub(pattern, replacement, key)
+                    # Store the weight with the new key
+                    renamed_weights[new_key] = weights[key]
+                    matched_keys.add(key)
+                    break
+
+            # If the key wasn't matched by any pattern, keep it as is
+            if key not in matched_keys:
+                renamed_weights[key] = weights[key]
+
+        return renamed_weights
+
+    def preprocess_weights(self, weights: dict) -> dict:
+        """
+        Preprocess weights before starting the loading process.
+        """
+        ...
+
+    def handle_manual_copy(self, module_name: str, module_weights: dict, n: str,
+                           p: nn.Parameter) -> None:
+        p.data.copy_(module_weights[n][:])
+
+    def does_require_special_handling(self, module_name: str) -> bool:
+        return module_name in self.mapping
+
+    def is_special_instance_module(self, module: nn.Module) -> bool:
+        return False
+
+    def handle_special_instance_module(self, module: nn.Module,
+                                       module_name: str,
+                                       module_weights: dict) -> None:
+        raise NotImplementedError()
+
+    @property
+    def skip_modules(self) -> List[str]:
+        return self._skip_modules
+
+    def add_skip_modules(self, value: List[str]) -> None:
+        self._skip_modules.extend(value)
+
+    def should_skip_module(self, module_name: str) -> bool:
+        return any(skip_module in module_name
+                   for skip_module in self._skip_modules)
+
+    def filter_weights(self, prefix: str, weights: dict) -> dict:
+        result = {}
+        for k, v in weights.items():
+            if k.startswith(prefix):
+                new_k = k[len(prefix) + 1:]
+                result[new_k] = v
+        return result
+
+    @property
+    def mapping(self) -> dict:
+        return self._mapping
+
+    @property
+    def config(self) -> TConfig:
+        if self._config is None:
+            raise RuntimeError("Weight mapper is not initialized")
+        return self._config
+
+    @property
+    def model(self) -> Union[nn.Module, DecoderModelForCausalLM]:
+        if self._model is None:
+            raise RuntimeError("Weight mapper is not initialized")
+        return self._model