Address review comments

moraxu · moraxu · commit df7e14d6ded2 · 2025-08-15T11:42:03.000-07:00
Signed-off-by: Michal Guzek &lt;mguzek@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -456,7 +456,7 @@ def get_bindings_model_config(self,
         head_dim_names = ["head_size", "head_dim"]
         head_size = None
         for head_dim_name in head_dim_names:
-            if head_dim_name in self.pretrained_config:
+            if hasattr(self.pretrained_config, head_dim_name):
                 value = getattr(self.pretrained_config, head_dim_name)
                 if value is not None:
                     head_size = value
diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py
@@ -1039,10 +1039,9 @@ def llama_model_root(request):
     elif request.param == "llama-3.2-3b":
         llama_model_root = os.path.join(models_root, "llama-3.2-models",
                                         "Llama-3.2-3B")
-    # TODO: Upload the model
-    # elif request.param == "llama-3.2-3b-instruct":
-    #     llama_model_root = os.path.join(models_root, "llama-3.2-models",
-    #                             "Llama-3.2-3B-Instruct")
+    elif request.param == "llama-3.2-3b-instruct":
+        llama_model_root = os.path.join(models_root, "llama-3.2-models",
+                                        "Llama-3.2-3B-Instruct")
     elif request.param == "llama-3.3-70b-instruct":
         llama_model_root = os.path.join(models_root, "llama-3.3-models",
                                         "Llama-3.3-70B-Instruct")
@@ -1374,8 +1373,7 @@ def llm_mistral_model_root(request):
     if request.param == "mistral-7b-v0.1":
         model_root = os.path.join(models_root, "mistral-7b-v0.1")
     if request.param == "mistral-nemo-instruct-2407":
-        model_root = os.path.join(
-            "/code/tensorrt_llm/my_hf_models/Mistral-Nemo-Instruct-2407")
+        model_root = os.path.join(models_root, "Mistral-Nemo-Instruct-2407")
     if request.param == "komt-mistral-7b-v1":
         model_root = os.path.join(models_root, "komt-mistral-7b-v1")
     if request.param == "mistral-7b-v0.3":
diff --git a/tests/integration/defs/examples/test_llama.py b/tests/integration/defs/examples/test_llama.py
@@ -4042,19 +4042,13 @@ def test_llama_3_x_fp8_with_bf16_lora(llama_example_root, llm_datasets_root,
 
 @skip_pre_ada
 @pytest.mark.skip_less_device_memory(80000)
-@pytest.mark.parametrize(
-    "llama_model_root",
-    [
-        'llama-v2-7b-hf',
-        'llama-v3-8b-instruct-hf',
-        'llama-3.1-8b',
-        'llama-3.2-1b',
-        'llama-3.2-3b',
-        'llama-3.1-8b-instruct',
-        'llama-3.2-1b-instruct',
-        # 'llama-3.2-3b-instruct', # TODO: Upload the model to scratch space
-    ],
-    indirect=True)
+@pytest.mark.parametrize("llama_model_root", [
+    'llama-v3-8b-instruct-hf',
+    'llama-3.1-8b-instruct',
+    'llama-3.2-1b-instruct',
+    'llama-3.2-3b-instruct',
+],
+                         indirect=True)
 def test_llama_3_x_with_bf16_lora_torch(llama_example_root, llm_datasets_root,
                                         qcache_dir_without_install_package,
                                         llm_venv, engine_dir, llama_model_root):
diff --git a/tests/integration/defs/examples/test_nemotron_nas.py b/tests/integration/defs/examples/test_nemotron_nas.py
@@ -155,7 +155,10 @@ def test_nemotron_nano_8b_lora_torch(nemotron_nas_example_root, llm_venv,
     )
 
 
-@pytest.mark.skip(reason="TODO: test on 4 GPUs locally")
+@pytest.mark.skip(
+    reason=
+    "TODO: The model has VGQA where different layers have different KV shapes, which breaks LoRA."
+)
 @pytest.mark.skip_less_device(4)
 @pytest.mark.skip_less_device_memory(80000)
 @pytest.mark.parametrize("nemotron_nas_model_root", [
@@ -193,7 +196,7 @@ def test_nemotron_super_49b_lora_torch(nemotron_nas_example_root, llm_venv,
     )
 
 
-@pytest.mark.skip(reason="TODO: test on 8 GPUs locally")
+# @pytest.mark.skip(reason="TODO: test on 8 GPUs locally")
 @pytest.mark.skip_less_device(8)
 @pytest.mark.skip_less_device_memory(80000)
 @pytest.mark.parametrize("nemotron_nas_model_root", [
diff --git a/tests/integration/defs/examples/test_phi.py b/tests/integration/defs/examples/test_phi.py
@@ -450,6 +450,8 @@ def test_phi_fp8_with_bf16_lora(llm_phi_model_root,
     )
 
 
+@pytest.mark.skip(
+    reason="TODO: Resolve an import issue with transformers's LossKwargs")
 @skip_pre_ada
 @pytest.mark.skip_less_device_memory(80000)
 @pytest.mark.parametrize("llm_phi_model_root", ['Phi-4-mini-instruct'],
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -90,7 +90,7 @@ l0_h100:
   - test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-enable_request_rate] # negative test
   - test_e2e.py::test_trtllm_bench_help_sanity[meta-llama/Llama-3.1-8B]
   - test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
-  - examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch[llama-3.1-8b]
+  - examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch[llama-3.2-1b-instruct]
   - examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1]
 - condition:
     ranges:
@@ -156,6 +156,23 @@ l0_h100:
   - accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_swiglu_plugin
   - examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-disable_fp8] # 4 mins
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 8
+        lte: 8
+    wildcards:
+      gpu:
+      - '*h100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: pytorch
+  tests:
+  # ------------- PyTorch tests ---------------
+  # TODO: TO REMOVE UPON SUCCESSFUL TESTING
+  - examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch_8_gpus
+  - examples/test_nemotron_nas.py::test_nemotron_ultra_253b_lora_torch
 - condition:
     ranges:
       system_gpu_count:

Original file line number	Diff line number	Diff line change
`@@ -450,6 +450,8 @@ def test_phi_fp8_with_bf16_lora(llm_phi_model_root,`
`450`	`450`	`)`
`451`	`451`
`452`	`452`
	`453`	`+@pytest.mark.skip(`
	`454`	`+ reason="TODO: Resolve an import issue with transformers's LossKwargs")`
`453`	`455`	`@skip_pre_ada`
`454`	`456`	`@pytest.mark.skip_less_device_memory(80000)`
`455`	`457`	`@pytest.mark.parametrize("llm_phi_model_root", ['Phi-4-mini-instruct'],`