@@ -32,8 +32,11 @@ trt_llm_release_perf_sanity_test:
3232 - perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20]
3333 - perf/test_perf.py::test_perf[flan_t5_large-bench-float16-input_output_len:128,20]
3434 - perf/test_perf.py::test_perf[whisper_large_v3-bench-float16-input_output_len:128,20]
35+ # llama_v3.1_8b_instruct
36+ # trt backend
3537 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128]
3638 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32]
39+ # pytorch backend
3740 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:128,128]
3841
3942 # Test list validation
@@ -58,7 +61,10 @@ trt_llm_release_perf_sanity_test:
5861 # E2E gptManagerBenchmark IFB
5962 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-cppmanager-exe-static_batching-plugin_ifb-float16-bs:8+64-input_output_len:128,128+512,32]
6063 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-cppmanager-exe-plugin_ifb-bfloat16-gwp:0.0-input_output_len:128,128+512,32]
64+ # llama_v3.1_8b
65+ # trt backend
6166 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32]
67+ # pytorch backend
6268 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:128,128]
6369 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:512,32]
6470 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-streaming-bfloat16-input_output_len:128,128]
@@ -77,8 +83,11 @@ trt_llm_release_perf_sanity_test:
7783 - ' *l20*'
7884 - ' *h20*'
7985 tests :
86+ # llama_v3.1_8b_instruct_fp8
87+ # trt backend
8088 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:fp8]
8189 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:512,32-quant:fp8]
90+ # pytorch backend
8291 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
8392 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
8493 - perf/test_perf.py::test_perf[llama_v3.1_nemotron_nano_8b_fp8-bench-pytorch-float8-maxbs:512-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
@@ -101,9 +110,12 @@ trt_llm_release_perf_sanity_test:
101110 tests :
102111 - perf/test_perf.py::test_perf[t5-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
103112 - perf/test_perf.py::test_perf[flan_t5_large-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
113+ # llama_v3.1_8b_instruct
114+ # trt backend
104115 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2]
105116 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-maxbs:256-input_output_len:128,128-gpus:2]
106117 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128-gpus:2]
118+ # pytorch backend
107119 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-maxbs:256-input_output_len:128,128-gpus:2]
108120 - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:2]
109121 - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
@@ -128,7 +140,7 @@ trt_llm_release_perf_sanity_test:
128140 - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
129141 - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-input_output_len:128,128-quant:fp8-gpus:2]
130142 - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:128,128-quant:fp8-gpus:2]
131- - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1 -bench-pytorch-float16 -input_output_len:128,128-quant:fp8 -gpus:2]
143+ - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8 -bench-pytorch-float8 -input_output_len:128,128-gpus:2]
132144
133145# Tests for systems with 2+ GPUs and high memory
134146- condition :
@@ -161,7 +173,10 @@ trt_llm_release_perf_sanity_test:
161173 - ' *l40s*'
162174 - ' *h20*'
163175 tests :
176+ # llama_v3.1_70b
177+ # trt backend
164178 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
179+ # pytorch backend
165180 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
166181 - perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-ootb_except_mha-float16-input_output_len:128,128-gpus:4]
167182 - perf/test_perf.py::test_perf[starcoder_15.5b-cppmanager-exe-plugin_ifb-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
@@ -198,9 +213,12 @@ trt_llm_release_perf_sanity_test:
198213 - ' *l40s*'
199214 - ' *h20*'
200215 tests :
216+ # llama_v3.1_70b
217+ # trt backend
201218 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
202- - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
203219 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:200,2000-reqs:10-gpus:8]
220+ # pytorch backend
221+ - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:2000,200-reqs:10-gpus:8]
204222 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:200,2000-reqs:10-gpus:8]
205223 - perf/test_perf.py::test_perf[llama_v3.3_70b-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
206224 - perf/test_perf.py::test_perf[llama_v3.3_70b-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
@@ -222,8 +240,13 @@ trt_llm_release_perf_sanity_test:
222240 - ' *h20*'
223241
224242 tests :
243+ # llama_v3.1_70b
244+ # trt backend
225245 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:128,128-quant:fp8-gpus:8]
246+ # pytorch backend
226247 - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:512,32-quant:fp8-gpus:8]
248+ # llama_v3.3_70b_instruct_fp8
249+ # pytorch backend
227250 - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:8]
228251
229252- condition :
0 commit comments