@@ -46,24 +46,18 @@ steps:
4646 mirror_hardwares : [amdexperimental]
4747 source_file_dependencies :
4848 - vllm/
49- - tests/mq_llm_engine
50- - tests/async_engine
5149 - tests/test_inputs.py
5250 - tests/test_outputs.py
5351 - tests/multimodal
5452 - tests/utils_
55- - tests/worker
5653 - tests/standalone_tests/lazy_imports.py
5754 - tests/transformers_utils
5855 commands :
5956 - python3 standalone_tests/lazy_imports.py
60- - pytest -v -s mq_llm_engine # MQLLMEngine
61- - pytest -v -s async_engine # AsyncLLMEngine
6257 - pytest -v -s test_inputs.py
6358 - pytest -v -s test_outputs.py
6459 - pytest -v -s multimodal
6560 - pytest -v -s utils_ # Utils
66- - pytest -v -s worker # Worker
6761 - pytest -v -s transformers_utils # transformers_utils
6862
6963- label : Python-only Installation Test # 10min
@@ -84,25 +78,12 @@ steps:
8478 - vllm/
8579 - tests/basic_correctness/test_basic_correctness
8680 - tests/basic_correctness/test_cpu_offload
87- - tests/basic_correctness/test_preemption
8881 - tests/basic_correctness/test_cumem.py
8982 commands :
9083 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
9184 - pytest -v -s basic_correctness/test_cumem.py
9285 - pytest -v -s basic_correctness/test_basic_correctness.py
9386 - pytest -v -s basic_correctness/test_cpu_offload.py
94- - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
95-
96- - label : Core Test # 22min
97- timeout_in_minutes : 35
98- mirror_hardwares : [amdexperimental]
99- fast_check : true
100- source_file_dependencies :
101- - vllm/core
102- - vllm/distributed
103- - tests/core
104- commands :
105- - pytest -v -s core
10687
10788- label : Entrypoints Unit Tests # 5min
10889 timeout_in_minutes : 10
@@ -230,16 +211,14 @@ steps:
230211 num_gpus : 2
231212 source_file_dependencies :
232213 - vllm/
233- - tests/metrics
234214 - tests/v1/tracing
235215 commands :
236- - pytest -v -s metrics
237216 - " pip install \
238217 'opentelemetry-sdk>=1.26.0' \
239218 'opentelemetry-api>=1.26.0' \
240219 'opentelemetry-exporter-otlp>=1.26.0' \
241220 'opentelemetry-semantic-conventions-ai>=0.4.1'"
242- - pytest -v -s tracing
221+ - pytest -v -s v1/ tracing
243222
244223# #### fast check tests #####
245224# #### 1 GPU test #####
@@ -394,6 +373,7 @@ steps:
394373 - pytest -v -s compile/test_async_tp.py
395374 - pytest -v -s compile/test_fusion_all_reduce.py
396375 - pytest -v -s compile/test_decorator.py
376+ - pytest -v -s compile/test_noop_elimination.py
397377
398378- label : PyTorch Fullgraph Smoke Test # 15min
399379 timeout_in_minutes : 30
@@ -548,15 +528,6 @@ steps:
548528 commands : # LMEval+Transcription WER check
549529 - pytest -s entrypoints/openai/correctness/
550530
551- - label : Encoder Decoder tests # 12min
552- timeout_in_minutes : 20
553- mirror_hardwares : [amdexperimental]
554- source_file_dependencies :
555- - vllm/
556- - tests/encoder_decoder
557- commands :
558- - pytest -v -s encoder_decoder
559-
560531- label : OpenAI-Compatible Tool Use # 23 min
561532 timeout_in_minutes : 35
562533 mirror_hardwares : [amdexperimental]
@@ -817,7 +788,7 @@ steps:
817788 # Quantization
818789 - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
819790 - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
820- - pytest -v -s tests/kernels/quantization/test_silu_nvfp4_quant_fusion .py
791+ - pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant .py
821792 - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
822793 - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
823794 - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
@@ -829,6 +800,20 @@ steps:
829800 - pytest -v -s tests/kernels/moe/test_flashinfer.py
830801 - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
831802
803+ - label : GPT-OSS Eval (Blackwell)
804+ timeout_in_minutes : 60
805+ working_dir : " /vllm-workspace/"
806+ gpu : b200
807+ optional : true # disable while debugging
808+ source_file_dependencies :
809+ - tests/evals/gpt_oss
810+ - vllm/model_executor/models/gpt_oss.py
811+ - vllm/model_executor/layers/quantization/mxfp4.py
812+ - vllm/v1/attention/backends/flashinfer.py
813+ commands :
814+ - uv pip install --system 'gpt-oss[eval]==0.0.5'
815+ - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 --server-args '--tensor-parallel-size 2'
816+
832817# #### 1 GPU test #####
833818# #### multi gpus test #####
834819
@@ -954,7 +939,6 @@ steps:
954939 commands :
955940 - pytest -v -s distributed/test_pp_cudagraph.py
956941 - pytest -v -s distributed/test_pipeline_parallel.py
957- # - pytest -v -s distributed/test_context_parallel.py # TODO: enable it on Hopper runners or add triton MLA support
958942
959943- label : LoRA TP Test (Distributed) # 17 min
960944 timeout_in_minutes : 30
@@ -1028,9 +1012,21 @@ steps:
10281012 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
10291013 - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
10301014
1031- - label : Qwen MoE EP Test # optional
1015+ # #### H200 test #####
1016+ - label : Distrubted Tests (H200) # optional
10321017 gpu : h200
10331018 optional : true
1019+ working_dir : " /vllm-workspace/"
1020+ num_gpus : 2
1021+ commands :
1022+ - pytest -v -s tests/distributed/test_context_parallel.py
1023+ - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1024+
1025+ # #### B200 test #####
1026+ - label : Distributed Tests (B200) # optional
1027+ gpu : b200
1028+ optional : true
1029+ working_dir : " /vllm-workspace/"
10341030 num_gpus : 2
10351031 commands :
1036- - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1032+ - pytest -v -s tests/distributed/test_context_parallel.py
0 commit comments