Skip to content

Commit df7e14d

Browse files
committed
Address review comments
Signed-off-by: Michal Guzek <[email protected]>
1 parent 8b50eae commit df7e14d

File tree

6 files changed

+37
-23
lines changed

6 files changed

+37
-23
lines changed

tensorrt_llm/_torch/model_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def get_bindings_model_config(self,
456456
head_dim_names = ["head_size", "head_dim"]
457457
head_size = None
458458
for head_dim_name in head_dim_names:
459-
if head_dim_name in self.pretrained_config:
459+
if hasattr(self.pretrained_config, head_dim_name):
460460
value = getattr(self.pretrained_config, head_dim_name)
461461
if value is not None:
462462
head_size = value

tests/integration/defs/conftest.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,10 +1039,9 @@ def llama_model_root(request):
10391039
elif request.param == "llama-3.2-3b":
10401040
llama_model_root = os.path.join(models_root, "llama-3.2-models",
10411041
"Llama-3.2-3B")
1042-
# TODO: Upload the model
1043-
# elif request.param == "llama-3.2-3b-instruct":
1044-
# llama_model_root = os.path.join(models_root, "llama-3.2-models",
1045-
# "Llama-3.2-3B-Instruct")
1042+
elif request.param == "llama-3.2-3b-instruct":
1043+
llama_model_root = os.path.join(models_root, "llama-3.2-models",
1044+
"Llama-3.2-3B-Instruct")
10461045
elif request.param == "llama-3.3-70b-instruct":
10471046
llama_model_root = os.path.join(models_root, "llama-3.3-models",
10481047
"Llama-3.3-70B-Instruct")
@@ -1374,8 +1373,7 @@ def llm_mistral_model_root(request):
13741373
if request.param == "mistral-7b-v0.1":
13751374
model_root = os.path.join(models_root, "mistral-7b-v0.1")
13761375
if request.param == "mistral-nemo-instruct-2407":
1377-
model_root = os.path.join(
1378-
"/code/tensorrt_llm/my_hf_models/Mistral-Nemo-Instruct-2407")
1376+
model_root = os.path.join(models_root, "Mistral-Nemo-Instruct-2407")
13791377
if request.param == "komt-mistral-7b-v1":
13801378
model_root = os.path.join(models_root, "komt-mistral-7b-v1")
13811379
if request.param == "mistral-7b-v0.3":

tests/integration/defs/examples/test_llama.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4042,19 +4042,13 @@ def test_llama_3_x_fp8_with_bf16_lora(llama_example_root, llm_datasets_root,
40424042

40434043
@skip_pre_ada
40444044
@pytest.mark.skip_less_device_memory(80000)
4045-
@pytest.mark.parametrize(
4046-
"llama_model_root",
4047-
[
4048-
'llama-v2-7b-hf',
4049-
'llama-v3-8b-instruct-hf',
4050-
'llama-3.1-8b',
4051-
'llama-3.2-1b',
4052-
'llama-3.2-3b',
4053-
'llama-3.1-8b-instruct',
4054-
'llama-3.2-1b-instruct',
4055-
# 'llama-3.2-3b-instruct', # TODO: Upload the model to scratch space
4056-
],
4057-
indirect=True)
4045+
@pytest.mark.parametrize("llama_model_root", [
4046+
'llama-v3-8b-instruct-hf',
4047+
'llama-3.1-8b-instruct',
4048+
'llama-3.2-1b-instruct',
4049+
'llama-3.2-3b-instruct',
4050+
],
4051+
indirect=True)
40584052
def test_llama_3_x_with_bf16_lora_torch(llama_example_root, llm_datasets_root,
40594053
qcache_dir_without_install_package,
40604054
llm_venv, engine_dir, llama_model_root):

tests/integration/defs/examples/test_nemotron_nas.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ def test_nemotron_nano_8b_lora_torch(nemotron_nas_example_root, llm_venv,
155155
)
156156

157157

158-
@pytest.mark.skip(reason="TODO: test on 4 GPUs locally")
158+
@pytest.mark.skip(
159+
reason=
160+
"TODO: The model has VGQA where different layers have different KV shapes, which breaks LoRA."
161+
)
159162
@pytest.mark.skip_less_device(4)
160163
@pytest.mark.skip_less_device_memory(80000)
161164
@pytest.mark.parametrize("nemotron_nas_model_root", [
@@ -193,7 +196,7 @@ def test_nemotron_super_49b_lora_torch(nemotron_nas_example_root, llm_venv,
193196
)
194197

195198

196-
@pytest.mark.skip(reason="TODO: test on 8 GPUs locally")
199+
# @pytest.mark.skip(reason="TODO: test on 8 GPUs locally")
197200
@pytest.mark.skip_less_device(8)
198201
@pytest.mark.skip_less_device_memory(80000)
199202
@pytest.mark.parametrize("nemotron_nas_model_root", [

tests/integration/defs/examples/test_phi.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ def test_phi_fp8_with_bf16_lora(llm_phi_model_root,
450450
)
451451

452452

453+
@pytest.mark.skip(
454+
reason="TODO: Resolve an import issue with transformers's LossKwargs")
453455
@skip_pre_ada
454456
@pytest.mark.skip_less_device_memory(80000)
455457
@pytest.mark.parametrize("llm_phi_model_root", ['Phi-4-mini-instruct'],

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ l0_h100:
9090
- test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-enable_request_rate] # negative test
9191
- test_e2e.py::test_trtllm_bench_help_sanity[meta-llama/Llama-3.1-8B]
9292
- test_e2e.py::test_ptp_quickstart_multimodal[gemma-3-27b-it-gemma/gemma-3-27b-it-image-True]
93-
- examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch[llama-3.1-8b]
93+
- examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch[llama-3.2-1b-instruct]
9494
- examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1]
9595
- condition:
9696
ranges:
@@ -156,6 +156,23 @@ l0_h100:
156156
- accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8_gemm_swiglu_plugin
157157
- examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]
158158
- examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-disable_fp8] # 4 mins
159+
- condition:
160+
ranges:
161+
system_gpu_count:
162+
gte: 8
163+
lte: 8
164+
wildcards:
165+
gpu:
166+
- '*h100*'
167+
linux_distribution_name: ubuntu*
168+
terms:
169+
stage: pre_merge
170+
backend: pytorch
171+
tests:
172+
# ------------- PyTorch tests ---------------
173+
# TODO: TO REMOVE UPON SUCCESSFUL TESTING
174+
- examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch_8_gpus
175+
- examples/test_nemotron_nas.py::test_nemotron_ultra_253b_lora_torch
159176
- condition:
160177
ranges:
161178
system_gpu_count:

0 commit comments

Comments
 (0)