diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index bf03fc88d82..d4b93057ead 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -985,7 +985,7 @@ def test_chunked_prefill(self): @skip_pre_blackwell -@pytest.mark.timeout(DEFAULT_TEST_TIMEOUT) +@pytest.mark.timeout(3600) class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness): FP4_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf" FP8_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf" diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 60fbffab27e..5d7d248d813 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -1289,8 +1289,7 @@ def run_disaggregated_benchmark(example_dir, random_input_len=16, random_output_len=64, num_prompts=100, - max_concurrency=32, - skip_warmup=False): + max_concurrency=32): """Run disaggregated test with given configuration.""" run_env = env.copy() run_env["UCX_TLS"] = "^ib" @@ -1320,7 +1319,7 @@ def run_disaggregated_benchmark(example_dir, stderr=subprocess.STDOUT, env=run_env, cwd=cwd) as server_proc): - # Ensure the sever has started + client_dir = f"{example_dir}/clients" client_cmd = [ 'python3', f'{client_dir}/disagg_client.py', '-c', @@ -1329,7 +1328,7 @@ def run_disaggregated_benchmark(example_dir, '--server-start-timeout', str(server_start_timeout) ] - # Warm up + # Ensure the sever has started and workers are ready check_call(client_cmd, env=env, poll_procs=[workers_proc, server_proc]) @@ -1366,9 +1365,6 @@ def run_disaggregated_benchmark(example_dir, '--percentile-metrics', 'e2el,ttft', ] - # warm up - if not skip_warmup: - check_call(benchmark_cmd, env=env) output = check_output(benchmark_cmd, env=env) e2el_pattern = r"Median E2EL \(ms\):\s*(\d+\.?\d*)" ttft_pattern = r"Median TTFT \(ms\):\s*(\d+\.?\d*)" @@ -1513,8 +1509,7 @@ def test_disaggregated_deepseek_v3_lite_bf16_empty_batch( num_prompts=10, max_concurrency=10, random_input_len=384, - random_output_len=1536, - skip_warmup=True) + random_output_len=1536) print(f"E2EL: {e2el} ms, TTFT: {ttft} ms") assert e2el > 0 and ttft > 0 diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 4d3c94e452e..e2950eb50a3 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -338,7 +338,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[4-0-disableDecoupleMode-te triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874) triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874) cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670) -disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682) disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574) accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm-fp8] SKIP (https://nvbugs/5608790) full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O)