From 531bf9a5b9d94c791a5fa142c50b32734cda2968 Mon Sep 17 00:00:00 2001
From: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
Date: Wed, 17 Sep 2025 14:45:42 +0000
Subject: [PATCH 1/4] [None][fix] update create_input_processor to handle
 custom checkpoint format

- Modified the create_input_processor function to accept a checkpoint_format parameter, defaulting to "HF".
- Add detailed parameter descriptions and return type clarification.
- The function now conditionally attempts to load the model configuration based on the specified format.

Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
---
 tensorrt_llm/inputs/registry.py | 32 +++++++++++++++++++++++---------
 tensorrt_llm/llmapi/llm.py      |  4 +++-
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/tensorrt_llm/inputs/registry.py b/tensorrt_llm/inputs/registry.py
index 6ac3d4ee657..1203fa5a222 100644
--- a/tensorrt_llm/inputs/registry.py
+++ b/tensorrt_llm/inputs/registry.py
@@ -448,20 +448,34 @@ def wrapper(model_cls: N) -> N:
     return wrapper
 
 
-def create_input_processor(model_path_or_dir: str, tokenizer):
-    """
-    Create an input processor for a specific model.
+def create_input_processor(
+    model_path_or_dir: str,
+    tokenizer,
+    checkpoint_format: Optional[str] = "HF",
+) -> InputProcessor:
+    """Create an input processor for a specific model.
+
+    Args:
+        model_path_or_dir: Path or repo id used to locate pretrained config/tokenizer.
+        tokenizer: Tokenizer instance.
+        checkpoint_format: Checkpoint format identifier. "HF" uses Hugging Face-style
+            config loading; any other value skips HF config loading. Default is "HF".
+
+    Returns:
+        An InputProcessor implementation (model-specific if registered; otherwise DefaultInputProcessor).
     """
     from tensorrt_llm._torch.model_config import ModelConfig
     from tensorrt_llm._torch.models import get_model_architecture
 
     model_config = None
-    try:
-        config = ModelConfig.from_pretrained(model_path_or_dir,
-                                             trust_remote_code=True)
-        model_config = config.pretrained_config
-    except (ValueError, EnvironmentError):
-        config = None
+
+    if checkpoint_format == "HF":
+        try:
+            config = ModelConfig.from_pretrained(model_path_or_dir,
+                                                 trust_remote_code=True)
+            model_config = config.pretrained_config
+        except (ValueError, EnvironmentError):
+            config = None
 
     if model_config is not None:
         try:
diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
index c9a7aed32b3..2bbb0e2134a 100644
--- a/tensorrt_llm/llmapi/llm.py
+++ b/tensorrt_llm/llmapi/llm.py
@@ -1036,8 +1036,10 @@ def _build_model(self):
         # Multimodal special handling:
         # 1. Default load_tokenizer may fail because MM has different tokenizer configuration. Hence we initialize it inside input processor
         # 2. May need to modify model weights for MM (e.g., resize vocab embedding). We must do such operation via input processor's __init__
+        checkpoint_format = getattr(self.args, "checkpoint_format", None)
         self.input_processor = create_input_processor(self._hf_model_dir,
-                                                      self.tokenizer)
+                                                      self.tokenizer,
+                                                      checkpoint_format)
         self._tokenizer = self.input_processor.tokenizer
 
         # TODO: revisit gather_context_logits

From 0724813dd626abe51ab4c95a1670848e2ba5d3dd Mon Sep 17 00:00:00 2001
From: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
Date: Tue, 30 Sep 2025 11:25:41 +0000
Subject: [PATCH 2/4] chore: Enhance error handling in create_input_processor

- Added debug logging for exceptions when loading the HF model configuration.
- Included a fallback message when skipping the HF config load based on checkpoint format.

Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
---
 tensorrt_llm/inputs/registry.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorrt_llm/inputs/registry.py b/tensorrt_llm/inputs/registry.py
index 1203fa5a222..7165b392d55 100644
--- a/tensorrt_llm/inputs/registry.py
+++ b/tensorrt_llm/inputs/registry.py
@@ -474,8 +474,14 @@ def create_input_processor(
             config = ModelConfig.from_pretrained(model_path_or_dir,
                                                  trust_remote_code=True)
             model_config = config.pretrained_config
-        except (ValueError, EnvironmentError):
+        except (ValueError, EnvironmentError) as e:
             config = None
+            logger.debug(
+                f"Unable to load HF config from {model_path_or_dir}: {e}. Falling back."
+            )
+    else:
+        logger.debug(
+            f"checkpoint_format={checkpoint_format}; skipping HF config load.")
 
     if model_config is not None:
         try:

From 81c7bc1b5b2649afd7cf801219d907cda1b70994 Mon Sep 17 00:00:00 2001
From: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
Date: Wed, 15 Oct 2025 09:54:52 +0000
Subject: [PATCH 3/4] [None][fix] Enhance MultimodalEncoder to support custom
 checkpoint loading

- Get checkpoint_format in MultimodalEncoder and pass it to create_input_processor.

Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
---
 tensorrt_llm/llmapi/mm_encoder.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorrt_llm/llmapi/mm_encoder.py b/tensorrt_llm/llmapi/mm_encoder.py
index af0f031fc02..8553d4678ea 100644
--- a/tensorrt_llm/llmapi/mm_encoder.py
+++ b/tensorrt_llm/llmapi/mm_encoder.py
@@ -51,8 +51,10 @@ def _build_model(self):
         # Multimodal special handling:
         # 1. Default load_tokenizer may fail because MM has different tokenizer configuration. Hence we initialize it inside input processor
         # 2. May need to modify model weights for MM (e.g., resize vocab embedding). We must do such operation via input processor's __init__
+        checkpoint_format = getattr(self.args, "checkpoint_format", None)
         self.input_processor = create_input_processor(self._hf_model_dir,
-                                                      self.tokenizer)
+                                                      self.tokenizer,
+                                                      checkpoint_format)
         self._tokenizer = self.input_processor.tokenizer
 
         assert isinstance(self.args, TorchLlmArgs)

From f288868cfb1fdb75f2e6e8aea9997bc19d0428b9 Mon Sep 17 00:00:00 2001
From: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
Date: Thu, 16 Oct 2025 10:28:48 +0000
Subject: [PATCH 4/4] [None][docs] Update checkpoint_format and
 checkpoint_loader documentation in LLMArgs

Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com>
---
 tensorrt_llm/llmapi/llm_args.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
index 35d02350e9c..8cb0d7e0bc7 100644
--- a/tensorrt_llm/llmapi/llm_args.py
+++ b/tensorrt_llm/llmapi/llm_args.py
@@ -2518,7 +2518,13 @@ class TorchLlmArgs(BaseLlmArgs):
                                    status="beta")
     checkpoint_loader: Optional[object] = Field(
         default=None,
-        description="The checkpoint loader to use for this LLM instance.",
+        description=
+        "The checkpoint loader to use for this LLM instance. You may use a custom checkpoint loader by subclassing "
+        "`BaseCheckpointLoader` and providing an instance of the subclass here to load weights from a custom "
+        "checkpoint format.\n"
+        "If neither checkpoint_format nor checkpoint_loader are provided, checkpoint_format will be set to HF "
+        "and the default HfCheckpointLoader will be used.\n"
+        "If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader will be ignored.",
         json_schema_extra={
             "type":
             "Optional[tensorrt_llm._torch.models.checkpoints.BaseCheckpointLoader]"
@@ -2528,7 +2534,12 @@ class TorchLlmArgs(BaseLlmArgs):
 
     checkpoint_format: Optional[str] = Field(
         default=None,
-        description="The format of the provided checkpoint.",
+        description=
+        "The format of the provided checkpoint. You may use a custom checkpoint format by subclassing "
+        "`BaseCheckpointLoader` and registering it with `register_checkpoint_loader`.\n"
+        "If neither checkpoint_format nor checkpoint_loader are provided, checkpoint_format will be set to HF "
+        "and the default HfCheckpointLoader will be used.\n"
+        "If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader will be ignored.",
         status="prototype",
     )