Skip to content

Commit 2c2fab7

Browse files
committed
Test new Slurm script
Signed-off-by: Yanchao Lu <[email protected]>
1 parent 67125cc commit 2c2fab7

File tree

4 files changed

+104
-52
lines changed

4 files changed

+104
-52
lines changed

jenkins/L0_Test.groovy

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import groovy.json.JsonOutput
77
import com.nvidia.bloom.KubernetesManager
88
import com.nvidia.bloom.Constants
99
import com.nvidia.bloom.CloudManager
10-
import com.nvidia.bloom.KubernetesManager
1110
import com.nvidia.bloom.SlurmConfig
1211
import com.nvidia.bloom.SlurmCluster
1312
import com.nvidia.bloom.SlurmPartition
@@ -219,8 +218,11 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
219218
SlurmPartition partition = SlurmConfig.partitionConfig[platform] as SlurmPartition
220219
SlurmCluster cluster = SlurmConfig.clusterConfig[partition.clusterName]
221220

222-
def nodeName = "${cluster.host}-test-${UUID.randomUUID().toString()}"
223-
def nodeSecret = CloudManager.createNode(nodeName)
221+
// Create a unique suffix for the node name and workspace
222+
String customSuffix = "${env.BUILD_TAG}-${UUID.randomUUID().toString().replaceAll("-", "").substring(0, 6)}".toLowerCase()
223+
def nodeName = "${cluster.host}-test-${customSuffix}"
224+
def customWorkspace = "/tmp/${nodeName}"
225+
def nodeSecret = CloudManager.createNode(nodeName, customWorkspace)
224226

225227
try {
226228
// Run ssh command to start node in desired cluster via SLURM
@@ -263,12 +265,30 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
263265
}
264266

265267
if (CloudManager.isNodeOnline(nodeName)) {
266-
def dockerArgs = "--gpus ${gpuCount} --cap-add=SYS_ADMIN --ipc=host --security-opt seccomp=unconfined -u root:root -v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
268+
node(nodeName) {
269+
sh """
270+
env | sort
271+
pwd && ls -alh
272+
ls -alh ${env.WORKSPACE}
273+
ls -alh ${env.WORKSPACE_TMP}
274+
"""
275+
}
276+
277+
def dockerArgs = "--gpus ${gpuCount} " +
278+
"--cap-add=SYS_ADMIN " +
279+
"--ipc=host " +
280+
"--security-opt seccomp=unconfined " +
281+
"-u root:root " +
282+
"-v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro " +
283+
"-v /tmp/ccache:${CCACHE_DIR}:rw " +
284+
"-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
285+
"--cap-add syslog"
267286

268287
if (partition.clusterName == "dlcluster") {
269288
dockerArgs += " -e NVIDIA_IMEX_CHANNELS=0"
270289
}
271-
slurmRunner = runInDockerOnNodeMultiStage(LLM_DOCKER_IMAGE, nodeName, dockerArgs, false)
290+
291+
slurmRunner = runInDockerOnNodeMultiStage(LLM_DOCKER_IMAGE, nodeName, dockerArgs, true)
272292
executeLLMTestOnSlurm(pipeline, platform, testList, config, perfMode, stageName, splitId, splits, skipInstallWheel, cpver, slurmRunner)
273293
} else {
274294
echo "The node does not come online in 2 hours, terminating the job"
@@ -560,6 +580,13 @@ def cacheErrorAndUploadResult(stageName, taskRunner, finallyRunner, noResultIfSu
560580
"${UPLOAD_PATH}/test-results/"
561581
)
562582
junit(testResults: "${stageName}/results*.xml")
583+
584+
// Clean up the workspace
585+
sh """
586+
env | sort
587+
pwd && ls -alh
588+
rm -rf ./*
589+
"""
563590
}
564591
}
565592
}
@@ -796,7 +823,7 @@ def echoNodeAndGpuInfo(pipeline, stageName)
796823

797824
def runLLMDocBuild(pipeline, config)
798825
{
799-
// Step 1: cloning tekit source code
826+
// Step 1: cloning source code
800827
sh "pwd && ls -alh"
801828
sh "env | sort"
802829
// allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
@@ -1241,13 +1268,16 @@ def rerunFailedTests(stageName, llmSrc, testCmdLine) {
12411268

12421269
def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CONFIG, perfMode=false, stageName="Undefined", splitId=1, splits=1, skipInstallWheel=false, cpver="cp312")
12431270
{
1244-
// Step 1: create LLM_ROOT dir
1245-
sh "pwd && ls -alh"
1246-
// TODO: proper way to clean workspace, maybe save in a folder named with BUILD_ID.
1247-
// So that it can work with multiple job running in same node
1248-
sh "rm -rf ./*"
1271+
// Step 1: create LLM_ROOT dir and clean up the workspace
12491272
def llmRootConfig = "${LLM_ROOT}${config}"
1250-
sh "mkdir ${llmRootConfig}"
1273+
sh """
1274+
env | sort
1275+
pwd && ls -alh
1276+
rm -rf ./*
1277+
mkdir ${llmRootConfig}
1278+
ls -alh ${env.WORKSPACE}
1279+
ls -alh ${env.WORKSPACE_TMP}
1280+
"""
12511281

12521282
def llmPath = sh (script: "realpath ${llmRootConfig}", returnStdout: true).trim()
12531283
def llmSrc = "${llmPath}/TensorRT-LLM/src"
@@ -1890,12 +1920,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
18901920
fullSet += SBSATestConfigs.keySet()
18911921

18921922
SBSASlurmTestConfigs = [
1893-
"GB200-PyTorch-1": ["gb200-unrestricted", "l0_gb200", 1, 3],
1894-
"GB200-PyTorch-2": ["gb200-unrestricted", "l0_gb200", 2, 3],
1895-
"GB200-PyTorch-3": ["gb200-unrestricted", "l0_gb200", 3, 3],
1896-
"GB200-TensorRT-1": ["gb200-unrestricted", "l0_gb200", 1, 2],
1897-
"GB200-TensorRT-2": ["gb200-unrestricted", "l0_gb200", 2, 2],
1898-
"GB200-Triton-Post-Merge-1": ["gb200-unrestricted", "l0_gb200", 1, 1],
1923+
// Not used in the pipeline now
1924+
// "GB200-PyTorch-1": ["gb200-single", "l0_gb200", 1, 3],
18991925
"GB200-4_GPUs-PyTorch-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19001926
"GB200-4_GPUs-PyTorch-Post-Merge-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19011927
]
@@ -1909,7 +1935,6 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
19091935
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-4": ["gb200-multi-node", "l0_gb200_multi_nodes", 4, 7, 8, 2],
19101936
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-5": ["gb200-multi-node", "l0_gb200_multi_nodes", 5, 7, 8, 2],
19111937
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-6": ["gb200-multi-node", "l0_gb200_multi_nodes", 6, 7, 8, 2],
1912-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-7": ["gb200-multi-node", "l0_gb200_multi_nodes", 7, 7, 8, 2],
19131938
]
19141939
fullSet += multiNodesSBSAConfigs.keySet()
19151940

@@ -2129,7 +2154,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
21292154
echo "###### Check pip install Start ######"
21302155
withEnv(libEnv) {
21312156
sh "env | sort"
2132-
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
2157+
timeout(time: 1, unit: 'HOURS') {
2158+
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
2159+
}
21332160
}
21342161
echo "###### Run LLMAPI tests Start ######"
21352162
def config = VANILLA_CONFIG
@@ -2464,7 +2491,7 @@ pipeline {
24642491

24652492
def testPhase2StageName = env.testPhase2StageName
24662493
if (testPhase2StageName) {
2467-
def dgxSigns = ["DGX_H100", "DGX_H200", "GB200-4_GPUs", "GB200-8_GPUs", "DGX_B200", "RTXPro6000-4_GPUs"]
2494+
def dgxSigns = ["2_GPUs", "4_GPUs", "8_GPUs"]
24682495
singleGpuJobs = parallelJobs.findAll{!dgxSigns.any{sign -> it.key.contains(sign)}}
24692496
dgxJobs = parallelJobs.findAll{dgxSigns.any{sign -> it.key.contains(sign)}}
24702497
}

jenkins/scripts/slurm_run.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ else
3434
done
3535
fi
3636
testList="$testList_$splitId"
37-
export CPP_TEST_TIMEOUT_OVERRIDDEN=7200
37+
export CPP_TEST_TIMEOUT_OVERRIDDEN=$pytestTestTimeout
3838
export LLM_ROOT=$llmSrcNode
3939
export LLM_MODELS_ROOT=$MODEL_CACHE_DIR
4040
export UCX_TLS=^gdr_copy
@@ -43,6 +43,7 @@ testCmdLines=(
4343
"$llmSrcNode/tensorrt_llm/llmapi/trtllm-llmapi-launch"
4444
"pytest"
4545
"-v"
46+
"--timeout-method=thread"
4647
"--timeout=$pytestTestTimeout"
4748
"--test-list=$testListPathNode"
4849
"--waives-file=$waivesListPathNode"

tests/integration/test_lists/test-db/l0_gb200.yml

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Don't add any tests here.
2+
# Copied from l0_b200.yml but not used in the pipeline now
13
version: 0.0.1
24
l0_gb200:
35
- condition:
@@ -21,22 +23,10 @@ l0_gb200:
2123
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
2224
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
2325
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=True]
24-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
25-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
26-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
2726
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
28-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
29-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
30-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
31-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]
3227
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
33-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
3428
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
35-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
36-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
37-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
3829
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
39-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
4030
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
4131
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=none-mtp_nextn=2-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
4232
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=nvfp4-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
@@ -67,15 +57,21 @@ l0_gb200:
6757
- test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]
6858
- test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
6959
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
70-
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)" TIMEOUT (120)
71-
- unittest/_torch -k "modeling_llama"
60+
- unittest/_torch/attention
61+
- unittest/_torch/compilation
62+
- unittest/_torch/debugger
63+
- unittest/_torch/executor
64+
- unittest/_torch/misc
65+
- unittest/_torch/modules
66+
- unittest/_torch/multimodal
67+
- unittest/_torch/sampler
68+
- unittest/_torch/speculative
69+
- unittest/_torch/thop
70+
- unittest/_torch/modeling -k "modeling_llama"
7271
- unittest/_torch/modeling -k "modeling_mixtral"
7372
- unittest/_torch/modeling -k "modeling_deepseek"
7473
- unittest/_torch/modeling -k "modeling_gpt_oss"
7574
- unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison"
76-
- unittest/_torch/speculative/test_eagle3.py
77-
- unittest/_torch/speculative/test_kv_cache_reuse.py
78-
- unittest/_torch/speculative/test_dynamic_spec_decode.py
7975
- condition:
8076
ranges:
8177
system_gpu_count:
@@ -87,7 +83,7 @@ l0_gb200:
8783
linux_distribution_name: ubuntu*
8884
cpu: aarch64
8985
terms:
90-
stage: pre_merge
86+
stage: post_merge
9187
backend: tensorrt
9288
tests:
9389
# ------------- TRT tests ---------------
@@ -103,20 +99,47 @@ l0_gb200:
10399
- unittest/llmapi/test_llm_quant.py
104100
- unittest/trt/functional/test_fp4_gemm.py
105101
- condition:
106-
ranges:
107-
system_gpu_count:
108-
gte: 1
109-
lte: 1
110-
wildcards:
111-
gpu:
112-
- '*gb200*'
113-
linux_distribution_name: ubuntu*
114-
cpu: aarch64
115-
terms:
116-
stage: post_merge
117-
backend: triton
102+
ranges:
103+
system_gpu_count:
104+
gte: 1
105+
lte: 1
106+
wildcards:
107+
gpu:
108+
- '*gb200*'
109+
linux_distribution_name: ubuntu*
110+
cpu: aarch64
111+
terms:
112+
stage: post_merge
113+
backend: triton
118114
tests:
119115
# ------------- Triton tests ---------------
120116
- triton_server/test_triton.py::test_llava[llava]
121117
- triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]
122118
- triton_server/test_triton.py::test_gpt_2b_ib_lora[gpt-2b-ib-lora]
119+
- condition:
120+
ranges:
121+
system_gpu_count:
122+
gte: 1
123+
lte: 1
124+
wildcards:
125+
gpu:
126+
- '*gb200*'
127+
linux_distribution_name: ubuntu*
128+
cpu: aarch64
129+
terms:
130+
stage: post_merge
131+
backend: pytorch
132+
tests:
133+
# ------------- PyTorch tests ---------------
134+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
135+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
136+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
137+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
138+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
139+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
140+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
141+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]
142+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
143+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
144+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
145+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]

tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,4 @@ l0_gb200_multi_gpus:
6666
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
6767
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
6868
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]
69+
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] TIMEOUT (90)

0 commit comments

Comments
 (0)