Skip to content

Commit baef70e

Browse files
authored
[None][ci] move qwen3 tests from b200 to gb200 (#7257)
Signed-off-by: junq <[email protected]>
1 parent 2d0c9b3 commit baef70e

File tree

3 files changed

+3
-133
lines changed

3 files changed

+3
-133
lines changed

jenkins/L0_Test.groovy

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,9 +1820,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
18201820
"H100_PCIe-CPP-2": ["h100-cr", "l0_h100", 2, 2],
18211821
"H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 2],
18221822
"H100_PCIe-TensorRT-2": ["h100-cr", "l0_h100", 2, 2],
1823-
"B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 3],
1824-
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 3],
1825-
"B200_PCIe-PyTorch-3": ["b100-ts2", "l0_b200", 3, 3],
1823+
"B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 2],
1824+
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
18261825
"RTX5090-PyTorch-1": ["rtx-5090", "l0_gb202", 1, 1],
18271826
"RTX5080-TensorRT-1": ["rtx-5080", "l0_gb203", 1, 2],
18281827
"RTX5080-TensorRT-2": ["rtx-5080", "l0_gb203", 2, 2],
@@ -1922,8 +1921,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
19221921
fullSet += SBSATestConfigs.keySet()
19231922

19241923
SBSASlurmTestConfigs = [
1925-
// Not used in the pipeline now
1926-
// "GB200-PyTorch-1": ["gb200-single", "l0_gb200", 1, 3],
1924+
"GB200-PyTorch-1": ["gb200-single", "l0_gb200", 1, 1],
19271925
"GB200-4_GPUs-PyTorch-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19281926
"GB200-4_GPUs-PyTorch-Post-Merge-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19291927
]

tests/integration/test_lists/test-db/l0_b200.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,6 @@ l0_b200:
3131
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=none-kv_cache_reuse=False-fp8kv=False-overlap_scheduler=True]
3232
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=nvfp4-kv_cache_reuse=True-fp8kv=False-overlap_scheduler=True]
3333
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=nvfp4-kv_cache_reuse=True-fp8kv=True-overlap_scheduler=True]
34-
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
35-
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]
36-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
37-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
38-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtllm-torch_compile=False]
39-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtllm-torch_compile=True]
40-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-CUTLASS]
41-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-TRITON]
42-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency-TRTLLM]
43-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]
44-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]
45-
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
4634
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
4735
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
4836
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
Lines changed: 0 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# Don't add any tests here.
2-
# Copied from l0_b200.yml but not used in the pipeline now
31
version: 0.0.1
42
l0_gb200:
53
- condition:
@@ -17,23 +15,6 @@ l0_gb200:
1715
backend: pytorch
1816
tests:
1917
# ------------- PyTorch tests ---------------
20-
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
21-
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4_streaming[stream_interval_4]
22-
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4_streaming[stream_interval_64]
23-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
24-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
25-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=True]
26-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
27-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
28-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
29-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
30-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
31-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=none-mtp_nextn=2-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
32-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=nvfp4-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
33-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=none-kv_cache_reuse=True-fp8kv=False-overlap_scheduler=True]
34-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=none-kv_cache_reuse=False-fp8kv=False-overlap_scheduler=True]
35-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=nvfp4-kv_cache_reuse=True-fp8kv=False-overlap_scheduler=True]
36-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_chunked_prefill[quant_dtype=nvfp4-kv_cache_reuse=True-fp8kv=True-overlap_scheduler=True]
3718
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
3819
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]
3920
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
@@ -46,100 +27,3 @@ l0_gb200:
4627
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]
4728
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]
4829
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
49-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
50-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
51-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
52-
- disaggregated/test_workers.py::test_workers_kv_cache_aware_router_eviction[TinyLlama-1.1B-Chat-v1.0] # nvbugs 5300551
53-
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]
54-
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
55-
- test_e2e.py::test_ptp_quickstart_advanced_mtp[DeepSeek-V3-Lite-BF16-DeepSeek-V3-Lite/bf16]
56-
- test_e2e.py::test_ptp_quickstart_advanced_mixed_precision
57-
- test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]
58-
- test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
59-
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
60-
- unittest/_torch/attention
61-
- unittest/_torch/compilation
62-
- unittest/_torch/debugger
63-
- unittest/_torch/executor
64-
- unittest/_torch/misc
65-
- unittest/_torch/modules
66-
- unittest/_torch/multimodal
67-
- unittest/_torch/sampler
68-
- unittest/_torch/speculative
69-
- unittest/_torch/thop
70-
- unittest/_torch/modeling -k "modeling_llama"
71-
- unittest/_torch/modeling -k "modeling_mixtral"
72-
- unittest/_torch/modeling -k "modeling_deepseek"
73-
- unittest/_torch/modeling -k "modeling_gpt_oss"
74-
- unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison"
75-
- condition:
76-
ranges:
77-
system_gpu_count:
78-
gte: 1
79-
lte: 1
80-
wildcards:
81-
gpu:
82-
- '*gb200*'
83-
linux_distribution_name: ubuntu*
84-
cpu: aarch64
85-
terms:
86-
stage: post_merge
87-
backend: tensorrt
88-
tests:
89-
# ------------- TRT tests ---------------
90-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4
91-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-disable_fused_quant]
92-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[disable_norm_quant_fusion-enable_fused_quant]
93-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[enable_norm_quant_fusion-disable_fused_quant]
94-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_nvfp4_gemm_plugin[enable_norm_quant_fusion-enable_fused_quant]
95-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_auto_dtype
96-
- accuracy/test_cli_flow.py::TestLlama3_8BInstruct::test_fp8
97-
- accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized
98-
- unittest/trt/attention/test_gpt_attention.py -k "trtllm_gen"
99-
- unittest/llmapi/test_llm_quant.py
100-
- unittest/trt/functional/test_fp4_gemm.py
101-
- condition:
102-
ranges:
103-
system_gpu_count:
104-
gte: 1
105-
lte: 1
106-
wildcards:
107-
gpu:
108-
- '*gb200*'
109-
linux_distribution_name: ubuntu*
110-
cpu: aarch64
111-
terms:
112-
stage: post_merge
113-
backend: triton
114-
tests:
115-
# ------------- Triton tests ---------------
116-
- triton_server/test_triton.py::test_llava[llava]
117-
- triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]
118-
- triton_server/test_triton.py::test_gpt_2b_ib_lora[gpt-2b-ib-lora]
119-
- condition:
120-
ranges:
121-
system_gpu_count:
122-
gte: 1
123-
lte: 1
124-
wildcards:
125-
gpu:
126-
- '*gb200*'
127-
linux_distribution_name: ubuntu*
128-
cpu: aarch64
129-
terms:
130-
stage: post_merge
131-
backend: pytorch
132-
tests:
133-
# ------------- PyTorch tests ---------------
134-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
135-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
136-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
137-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
138-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
139-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
140-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
141-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]
142-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
143-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
144-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
145-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]

0 commit comments

Comments
 (0)