Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 25 additions & 64 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@
"nightly-8-gpu-h20": [],
"__not_in_ci__": [
TestFile("ascend/test_ascend_w8a8_quantization.py"),
TestFile("ascend/test_mindspore_models.py"),
TestFile("cpu/test_comm.py"),
TestFile("debug_utils/test_log_parser.py", 5),
TestFile("test_deepseek_v3_cutedsl_4gpu.py"),
Expand All @@ -242,8 +243,6 @@
TestFile("ep/test_moe_deepep.py"),
TestFile("ep/test_moe_deepep_eval_accuracy_large.py"),
TestFile("hicache/test_disaggregation_hicache.py"),
TestFile("hicache/test_hicache_storage_benchmark.py"),
TestFile("hicache/test_hicache_storage_e2e.py"),
TestFile("layers/attention/nsa/test_act_quant_triton.py"),
TestFile("layers/moe/test_moe_runners.py"),
TestFile("lora/test_chunked_sgmv_backend.py"),
Expand All @@ -258,120 +257,61 @@
TestFile("models/test_llama4_models.py"),
TestFile("models/test_mtp_models.py"),
TestFile("models/test_unsloth_models.py"),
TestFile("openai/test_server.py"),
TestFile("openai_server/features/test_cache_report.py"),
TestFile("openai_server/features/test_continuous_usage_stats.py"),
TestFile("openai_server/features/test_structural_tag.py"),
TestFile("quant/test_fp8_kvcache.py"),
TestFile("rl/test_verl_engine_2_gpu.py"),
TestFile("rl/test_verl_engine_4_gpu.py"),
TestFile("test_ascend_attention_backend.py"),
TestFile("test_ascend_mla_backend.py"),
TestFile("test_ascend_mla_w8a8int8.py"),
TestFile("test_ascend_tp1_bf16.py"),
TestFile("test_ascend_tp2_bf16.py"),
TestFile("test_ascend_w8a8_quantization.py"),
TestFile("test_async_dynamic_batch_tokenizer.py"),
TestFile("test_async_mm_data_processor.py"),
TestFile("test_awq.py"),
TestFile("test_awq_dequant.py"),
TestFile("test_bench_one_batch.py"),
TestFile("test_bench_serving.py"),
TestFile("test_block_int8.py"),
TestFile("test_cache_report.py"),
TestFile("test_config_integration.py"),
TestFile("test_cpp_radix_cache.py"),
TestFile("test_cpu_graph.py"),
TestFile("test_custom_allreduce.py"),
TestFile("test_cutedsl_flashinfer_8gpu.py"),
TestFile("test_deepep_internode.py"),
TestFile("test_deepep_intranode.py"),
TestFile("test_deepep_large.py"),
TestFile("test_deepep_low_latency.py"),
TestFile("test_deepep_small.py"),
TestFile("test_deepseek_chat_templates.py"),
TestFile("test_disaggregation.py"),
TestFile("test_double_sparsity.py"),
TestFile("test_eagle_infer_beta_dp_attention.py"),
TestFile("test_embedding_openai_server.py"),
TestFile("test_enable_thinking.py"),
TestFile("test_eplb.py"),
TestFile("test_eval_accuracy_large.py"),
TestFile("test_expert_distribution.py"),
TestFile("test_expert_location_updater.py"),
TestFile("test_fim_completion.py"),
TestFile("test_forward_split_prefill.py"),
TestFile("test_fp8_kernel.py"),
TestFile("test_fp8_kvcache.py"),
TestFile("test_full_deepseek_v3.py"),
TestFile("test_get_weights_by_name.py"),
TestFile("test_gpt_oss_common.py"),
TestFile("test_health_check.py"),
TestFile("test_hicache_storage.py"),
TestFile("test_hicache_variants.py"),
TestFile("test_hybrid_dp_ep_tp_mtp.py"),
TestFile("test_int4_kernel.py"),
TestFile("test_int8_kernel.py"),
TestFile("test_intel_amx_attention_backend.py"),
TestFile("test_constrained_decoding.py"),
TestFile("test_json_mode.py"),
TestFile("test_kv_events.py"),
TestFile("test_large_max_new_tokens.py"),
TestFile("test_logprobs.py"),
TestFile("test_lookahead_speculative_decoding.py"),
TestFile("test_matched_stop.py"),
TestFile("test_mla_tp.py"),
TestFile("test_modelopt.py"),
TestFile("test_modelopt_fp8kvcache.py"),
TestFile("test_models_from_modelscope.py"),
TestFile("test_moe_deepep.py"),
TestFile("test_moe_deepep_eval_accuracy_large.py"),
TestFile("test_moe_ep.py"),
TestFile("test_moe_eval_accuracy_large.py"),
TestFile("test_mscclpp.py"),
TestFile("nightly/test_deepseek_v31_perf.py"),
TestFile("nightly/test_deepseek_v32_perf.py"),
TestFile("nightly/test_gpt_oss_4gpu_perf.py"),
TestFile("nightly/test_gsm8k_eval_amd.py"),
TestFile("nightly/test_text_models_gsm8k_eval.py"),
TestFile("nightly/test_text_models_perf.py"),
TestFile("nightly/test_vlms_mmmu_eval.py"),
TestFile("nightly/test_vlms_perf.py"),
TestFile("test_openai_adapter.py"),
TestFile("test_openai_function_calling.py"),
TestFile("test_openai_server.py"),
TestFile("test_openai_server_hidden_states.py"),
TestFile("test_piecewise_cuda_graph.py"),
TestFile("test_quick_allreduce.py"),
TestFile("test_reasoning_content.py"),
TestFile("test_request_length_validation.py"),
TestFile("test_sagemaker_server.py"),
TestFile("test_schedule_policy.py"),
TestFile("test_session_control.py"),
TestFile("test_srt_engine_with_quant_args.py"),
TestFile("test_tokenizer_batch_encode.py"),
TestFile("test_tokenizer_manager.py"),
TestFile("test_tool_choice.py"),
TestFile("test_torch_flex_attention_backend.py"),
TestFile("test_torch_tp.py"),
TestFile("test_tracing.py"),
TestFile("test_triton_attention_rocm_mla.py"),
TestFile("test_triton_fused_moe.py"),
TestFile("test_triton_moe_wna16.py"),
TestFile("test_two_batch_overlap.py"),
TestFile("test_update_weights_from_disk.py"),
TestFile("test_update_weights_from_distributed.py"),
TestFile("test_update_weights_from_tensor.py"),
TestFile("test_verl_engine_2_gpu.py"),
TestFile("test_verl_engine_4_gpu.py"),
TestFile("test_verl_engine_server.py"),
TestFile("test_vertex_endpoint.py"),
# TestFile("test_vision_openai_server_a.py"), # TODO: Fix timeout
TestFile("test_vision_openai_server_b.py"),
TestFile("test_vision_openai_server_common.py"),
TestFile("test_vlm_accuracy.py"),
TestFile("test_w4a8.py"),
TestFile("test_w8a8_quantization.py"),
TestFile("test_wave_attention_backend.py"),
TestFile("test_weight_version.py"),
TestFile("test_deepseek_v32_cp_single_node.py", 275),
Expand Down Expand Up @@ -537,9 +477,6 @@
TestFile("ascend/test_ascend_deepep.py", 400),
TestFile("ascend/test_ascend_deepseek_mtp.py", 400),
],
"__not_in_ascend_ci__": [
TestFile("ascend/test_mindspore_models.py"),
],
}

suites.update(suite_amd)
Expand Down Expand Up @@ -614,6 +551,30 @@ def _sanity_check_suites(suites):
f"{missing_text}"
)

nonexistent_files = sorted(list(suite_files - disk_files))
nonexistent_text = "\n".join(f'TestFile("{x}"),' for x in nonexistent_files)
assert (
len(nonexistent_files) == 0
), f"Some test files in test suite do not exist on disk:\n{nonexistent_text}"

not_in_ci_files = set(
[test_file.name for test_file in suites.get("__not_in_ci__", [])]
)
in_ci_files = set(
[
test_file.name
for suite_name, suite in suites.items()
if suite_name != "__not_in_ci__"
for test_file in suite
]
)
intersection = not_in_ci_files & in_ci_files
intersection_text = "\n".join(f'TestFile("{x}"),' for x in intersection)
assert len(intersection) == 0, (
f"Some test files are in both `not_in_ci` section and other suites:\n"
f"{intersection_text}"
)


def main():
arg_parser = argparse.ArgumentParser()
Expand Down
Loading