From 531bf9a5b9d94c791a5fa142c50b32734cda2968 Mon Sep 17 00:00:00 2001 From: Robin Kobus <19427718+Funatiq@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:45:42 +0000 Subject: [PATCH 1/4] [None][fix] update create_input_processor to handle custom checkpoint format - Modified the create_input_processor function to accept a checkpoint_format parameter, defaulting to "HF". - Add detailed parameter descriptions and return type clarification. - The function now conditionally attempts to load the model configuration based on the specified format. Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com> --- tensorrt_llm/inputs/registry.py | 32 +++++++++++++++++++++++--------- tensorrt_llm/llmapi/llm.py | 4 +++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/tensorrt_llm/inputs/registry.py b/tensorrt_llm/inputs/registry.py index 6ac3d4ee657..1203fa5a222 100644 --- a/tensorrt_llm/inputs/registry.py +++ b/tensorrt_llm/inputs/registry.py @@ -448,20 +448,34 @@ def wrapper(model_cls: N) -> N: return wrapper -def create_input_processor(model_path_or_dir: str, tokenizer): - """ - Create an input processor for a specific model. +def create_input_processor( + model_path_or_dir: str, + tokenizer, + checkpoint_format: Optional[str] = "HF", +) -> InputProcessor: + """Create an input processor for a specific model. + + Args: + model_path_or_dir: Path or repo id used to locate pretrained config/tokenizer. + tokenizer: Tokenizer instance. + checkpoint_format: Checkpoint format identifier. "HF" uses Hugging Face-style + config loading; any other value skips HF config loading. Default is "HF". + + Returns: + An InputProcessor implementation (model-specific if registered; otherwise DefaultInputProcessor). """ from tensorrt_llm._torch.model_config import ModelConfig from tensorrt_llm._torch.models import get_model_architecture model_config = None - try: - config = ModelConfig.from_pretrained(model_path_or_dir, - trust_remote_code=True) - model_config = config.pretrained_config - except (ValueError, EnvironmentError): - config = None + + if checkpoint_format == "HF": + try: + config = ModelConfig.from_pretrained(model_path_or_dir, + trust_remote_code=True) + model_config = config.pretrained_config + except (ValueError, EnvironmentError): + config = None if model_config is not None: try: diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py index c9a7aed32b3..2bbb0e2134a 100644 --- a/tensorrt_llm/llmapi/llm.py +++ b/tensorrt_llm/llmapi/llm.py @@ -1036,8 +1036,10 @@ def _build_model(self): # Multimodal special handling: # 1. Default load_tokenizer may fail because MM has different tokenizer configuration. Hence we initialize it inside input processor # 2. May need to modify model weights for MM (e.g., resize vocab embedding). We must do such operation via input processor's __init__ + checkpoint_format = getattr(self.args, "checkpoint_format", None) self.input_processor = create_input_processor(self._hf_model_dir, - self.tokenizer) + self.tokenizer, + checkpoint_format) self._tokenizer = self.input_processor.tokenizer # TODO: revisit gather_context_logits From 0724813dd626abe51ab4c95a1670848e2ba5d3dd Mon Sep 17 00:00:00 2001 From: Robin Kobus <19427718+Funatiq@users.noreply.github.com> Date: Tue, 30 Sep 2025 11:25:41 +0000 Subject: [PATCH 2/4] chore: Enhance error handling in create_input_processor - Added debug logging for exceptions when loading the HF model configuration. - Included a fallback message when skipping the HF config load based on checkpoint format. Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com> --- tensorrt_llm/inputs/registry.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/inputs/registry.py b/tensorrt_llm/inputs/registry.py index 1203fa5a222..7165b392d55 100644 --- a/tensorrt_llm/inputs/registry.py +++ b/tensorrt_llm/inputs/registry.py @@ -474,8 +474,14 @@ def create_input_processor( config = ModelConfig.from_pretrained(model_path_or_dir, trust_remote_code=True) model_config = config.pretrained_config - except (ValueError, EnvironmentError): + except (ValueError, EnvironmentError) as e: config = None + logger.debug( + f"Unable to load HF config from {model_path_or_dir}: {e}. Falling back." + ) + else: + logger.debug( + f"checkpoint_format={checkpoint_format}; skipping HF config load.") if model_config is not None: try: From 81c7bc1b5b2649afd7cf801219d907cda1b70994 Mon Sep 17 00:00:00 2001 From: Robin Kobus <19427718+Funatiq@users.noreply.github.com> Date: Wed, 15 Oct 2025 09:54:52 +0000 Subject: [PATCH 3/4] [None][fix] Enhance MultimodalEncoder to support custom checkpoint loading - Get checkpoint_format in MultimodalEncoder and pass it to create_input_processor. Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com> --- tensorrt_llm/llmapi/mm_encoder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorrt_llm/llmapi/mm_encoder.py b/tensorrt_llm/llmapi/mm_encoder.py index af0f031fc02..8553d4678ea 100644 --- a/tensorrt_llm/llmapi/mm_encoder.py +++ b/tensorrt_llm/llmapi/mm_encoder.py @@ -51,8 +51,10 @@ def _build_model(self): # Multimodal special handling: # 1. Default load_tokenizer may fail because MM has different tokenizer configuration. Hence we initialize it inside input processor # 2. May need to modify model weights for MM (e.g., resize vocab embedding). We must do such operation via input processor's __init__ + checkpoint_format = getattr(self.args, "checkpoint_format", None) self.input_processor = create_input_processor(self._hf_model_dir, - self.tokenizer) + self.tokenizer, + checkpoint_format) self._tokenizer = self.input_processor.tokenizer assert isinstance(self.args, TorchLlmArgs) From f288868cfb1fdb75f2e6e8aea9997bc19d0428b9 Mon Sep 17 00:00:00 2001 From: Robin Kobus <19427718+Funatiq@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:28:48 +0000 Subject: [PATCH 4/4] [None][docs] Update checkpoint_format and checkpoint_loader documentation in LLMArgs Signed-off-by: Robin Kobus <19427718+Funatiq@users.noreply.github.com> --- tensorrt_llm/llmapi/llm_args.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py index 35d02350e9c..8cb0d7e0bc7 100644 --- a/tensorrt_llm/llmapi/llm_args.py +++ b/tensorrt_llm/llmapi/llm_args.py @@ -2518,7 +2518,13 @@ class TorchLlmArgs(BaseLlmArgs): status="beta") checkpoint_loader: Optional[object] = Field( default=None, - description="The checkpoint loader to use for this LLM instance.", + description= + "The checkpoint loader to use for this LLM instance. You may use a custom checkpoint loader by subclassing " + "`BaseCheckpointLoader` and providing an instance of the subclass here to load weights from a custom " + "checkpoint format.\n" + "If neither checkpoint_format nor checkpoint_loader are provided, checkpoint_format will be set to HF " + "and the default HfCheckpointLoader will be used.\n" + "If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader will be ignored.", json_schema_extra={ "type": "Optional[tensorrt_llm._torch.models.checkpoints.BaseCheckpointLoader]" @@ -2528,7 +2534,12 @@ class TorchLlmArgs(BaseLlmArgs): checkpoint_format: Optional[str] = Field( default=None, - description="The format of the provided checkpoint.", + description= + "The format of the provided checkpoint. You may use a custom checkpoint format by subclassing " + "`BaseCheckpointLoader` and registering it with `register_checkpoint_loader`.\n" + "If neither checkpoint_format nor checkpoint_loader are provided, checkpoint_format will be set to HF " + "and the default HfCheckpointLoader will be used.\n" + "If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader will be ignored.", status="prototype", )