Skip to content

Commit a3dbf7a

Browse files
committed
add addintional info for perf test list
Signed-off-by: ruodil <[email protected]>
1 parent 470544c commit a3dbf7a

File tree

4 files changed

+122
-44
lines changed

4 files changed

+122
-44
lines changed

tests/integration/defs/perf/test_perf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"llama_v3.3_70b_instruct_fp4":
5656
"modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4",
5757
"llama_v3.3_70b_instruct": "llama-3.3-models/Llama-3.3-70B-Instruct",
58+
"llama_v3.1_405b_instruct_fp8": "llama-3.1-model/Llama-3.1-405B-Instruct-FP8",
5859
"llama_v3.1_405b_instruct_fp4":
5960
"modelopt-hf-model-hub/Llama-3.1-405B-Instruct-fp4",
6061
"llama_v3.1_70b_instruct": "llama-3.1-model/Meta-Llama-3.1-70B-Instruct",
@@ -71,10 +72,11 @@
7172
"nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1-FP8",
7273
"llama_v4_scout_17b_16e_instruct":
7374
"llama4-models/Llama-4-Scout-17B-16E-Instruct",
75+
"llama_v4_scout_17b_16e_instruct_fp8":
7476
"llama_v4_maverick_17b_128e_instruct":
7577
"llama4-models/Llama-4-Maverick-17B-128E-Instruct",
7678
"llama_v4_maverick_17b_128e_instruct_fp8":
77-
"llama4-models/Llama-4-Maverick-17B-128E-Instruct-FP8",
79+
"modelopt-hf-model-hub/Llama-4-Maverick-17B-128E-Instruct-FP8",
7880
# "llama_30b": "llama-models/llama-30b-hf",
7981
"mixtral_8x7b_v0.1": "Mixtral-8x7B-v0.1",
8082
"mixtral_8x7b_v0.1_instruct": "Mixtral-8x7B-Instruct-v0.1",

tests/integration/test_lists/qa/trt_llm_release_perf_cluster_test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ trt_llm_release_perf_cluster_test:
3939
gte: 4
4040
tests:
4141
- perf/test_perf.py::test_perf[mixtral_8x22b_v0.1-bench-float16-input_output_len:512,512-quant:fp8-tp:4]
42-
- perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:128,128-gpus:4]
43-
- perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:512,32-gpus:4]
4442
- perf/test_perf.py::test_perf[starcoder_15b-bench-float16-input_output_len:512,200-gpus:4]
4543
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float4-maxbs:512-input_output_len:128,128-ep:4-tp:4-gpus:4]
4644
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-streaming-float4-maxbs:512-input_output_len:128,128-ep:4-tp:4-gpus:4]

tests/integration/test_lists/qa/trt_llm_release_perf_sanity_test.yml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@ trt_llm_release_perf_sanity_test:
3232
- perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20]
3333
- perf/test_perf.py::test_perf[flan_t5_large-bench-float16-input_output_len:128,20]
3434
- perf/test_perf.py::test_perf[whisper_large_v3-bench-float16-input_output_len:128,20]
35+
#llama_v3.1_8b_instruct
36+
#trt backend
3537
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128]
3638
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32]
39+
#pytorch backend
3740
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:128,128]
3841

3942
# Test list validation
@@ -58,7 +61,10 @@ trt_llm_release_perf_sanity_test:
5861
# E2E gptManagerBenchmark IFB
5962
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-cppmanager-exe-static_batching-plugin_ifb-float16-bs:8+64-input_output_len:128,128+512,32]
6063
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-cppmanager-exe-plugin_ifb-bfloat16-gwp:0.0-input_output_len:128,128+512,32]
64+
#llama_v3.1_8b
65+
#trt backend
6166
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32]
67+
#pytorch backend
6268
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:128,128]
6369
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:512,32]
6470
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-streaming-bfloat16-input_output_len:128,128]
@@ -77,8 +83,11 @@ trt_llm_release_perf_sanity_test:
7783
- '*l20*'
7884
- '*h20*'
7985
tests:
86+
#llama_v3.1_8b_instruct_fp8
87+
#trt backend
8088
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:fp8]
8189
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32-quant:fp8]
90+
#pytorch backend
8291
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
8392
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
8493
- perf/test_perf.py::test_perf[llama_v3.1_nemotron_nano_8b_fp8-bench-pytorch-float8-maxbs:512-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
@@ -101,9 +110,12 @@ trt_llm_release_perf_sanity_test:
101110
tests:
102111
- perf/test_perf.py::test_perf[t5-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
103112
- perf/test_perf.py::test_perf[flan_t5_large-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
113+
#llama_v3.1_8b_instruct
114+
#trt backend
104115
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2]
105116
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-maxbs:256-input_output_len:128,128-gpus:2]
106117
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128-gpus:2]
118+
#pytorch backend
107119
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-maxbs:256-input_output_len:128,128-gpus:2]
108120
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:2]
109121
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
@@ -128,7 +140,7 @@ trt_llm_release_perf_sanity_test:
128140
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
129141
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
130142
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:128,128-quant:fp8-gpus:2]
131-
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-quant:fp8-gpus:2]
143+
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
132144

133145
# Tests for systems with 2+ GPUs and high memory
134146
- condition:
@@ -161,7 +173,10 @@ trt_llm_release_perf_sanity_test:
161173
- '*l40s*'
162174
- '*h20*'
163175
tests:
176+
#llama_v3.1_70b
177+
#trt backend
164178
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
179+
#pytorch backend
165180
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
166181
- perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-ootb_except_mha-float16-input_output_len:128,128-gpus:4]
167182
- perf/test_perf.py::test_perf[starcoder_15.5b-cppmanager-exe-plugin_ifb-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
@@ -198,9 +213,12 @@ trt_llm_release_perf_sanity_test:
198213
- '*l40s*'
199214
- '*h20*'
200215
tests:
216+
#llama_v3.1_70b
217+
#trt backend
201218
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
202-
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
203219
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:200,2000-reqs:10-gpus:8]
220+
#pytorch backend
221+
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
204222
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:200,2000-reqs:10-gpus:8]
205223
- perf/test_perf.py::test_perf[llama_v3.3_70b-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
206224
- perf/test_perf.py::test_perf[llama_v3.3_70b-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
@@ -222,8 +240,13 @@ trt_llm_release_perf_sanity_test:
222240
- '*h20*'
223241

224242
tests:
243+
#llama_v3.1_70b
244+
#trt backend
225245
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:128,128-quant:fp8-gpus:8]
246+
#pytorch backend
226247
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:512,32-quant:fp8-gpus:8]
248+
#llama_v3.3_70b_instruct_fp8
249+
#pytorch backend
227250
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:8]
228251

229252
- condition:

0 commit comments

Comments
 (0)