NVIDIA
diff --git a/‎tensorrt_llm/executor/request.py‎
Lines changed: 9 additions & 0 deletions b/‎tensorrt_llm/executor/request.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tensorrt_llm/executor/worker.py‎
Lines changed: 2 additions & 1 deletion b/‎tensorrt_llm/executor/worker.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/integration/defs/llmapi/test_llm_pytorch_nemo_lora.py‎
Lines changed: 0 additions & 296 deletions b/‎tests/integration/defs/llmapi/test_llm_pytorch_nemo_lora.py‎
Lines changed: 0 additions & 296 deletions
diff --git a/‎tests/integration/test_lists/test-db/l0_h100.yml‎
Lines changed: 0 additions & 2 deletions b/‎tests/integration/test_lists/test-db/l0_h100.yml‎
Lines changed: 0 additions & 2 deletions
@@ -25,10 +25,15 @@ class LoRARequest:
     lora_name: str
     lora_int_id: int
     lora_path: str = ""
+    lora_ckpt_source: str = "hf"
 
     def __post_init__(self):
         if self.lora_path is not None and not os.path.exists(self.lora_path):
             raise ValueError(f"lora_path ({self.lora_path}) does not exist.")
+        if self.lora_ckpt_source not in ["hf", "nemo"]:
+            raise ValueError(
+                f"lora_ckpt_source must be 'hf' or 'nemo', got '{self.lora_ckpt_source}'"
+            )
 
     @property
     def adapter_id(self):
@@ -42,6 +47,10 @@ def name(self):
     def path(self):
         return self.lora_path
 
+    @property
+    def ckpt_source(self):
+        return self.lora_ckpt_source
+
 
 @dataclass(slots=True)
 class PromptAdapterRequest:
 
@@ -349,7 +349,8 @@ def _load_lora_adapter(self, lora_request: LoRARequest) -> bool:
             model_config=self._runtime_model_config if
             self._runtime_model_config is not None else self._lora_model_config,
             runtime_mapping=None,
-            uids=[adapter_id])
+            uids=[adapter_id],
+            ckpt_source=lora_request.ckpt_source)
         return adapter_id in newly_loaded_uids
 
     def _load_prompt_adapter(self,
 
@@ -20,7 +20,6 @@ l0_h100:
   - unittest/_torch/modeling -k "modeling_mixtral"
   - unittest/_torch/modeling -k "modeling_nemotron"
   - unittest/_torch/modeling -k "modeling_gemma3"
-  - unittest/llmapi/test_pytorch_nemo_lora.py
   - unittest/disaggregated/test_disagg_utils.py
   - unittest/disaggregated/test_router.py
   - unittest/disaggregated/test_remoteDictionary.py
@@ -189,7 +188,6 @@ l0_h100:
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=none-mtp_nextn=2-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
   - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency]
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]
-  - llmapi/test_llm_pytorch_nemo_lora.py
 
 - condition:
     ranges: