Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,7 @@ def test_chunked_prefill(self):


@skip_pre_blackwell
@pytest.mark.timeout(DEFAULT_TEST_TIMEOUT)
@pytest.mark.timeout(3600)
class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
FP4_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf"
FP8_MODEL = f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf"
Expand Down
13 changes: 4 additions & 9 deletions tests/integration/defs/disaggregated/test_disaggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,8 +1289,7 @@ def run_disaggregated_benchmark(example_dir,
random_input_len=16,
random_output_len=64,
num_prompts=100,
max_concurrency=32,
skip_warmup=False):
max_concurrency=32):
"""Run disaggregated test with given configuration."""
run_env = env.copy()
run_env["UCX_TLS"] = "^ib"
Expand Down Expand Up @@ -1320,7 +1319,7 @@ def run_disaggregated_benchmark(example_dir,
stderr=subprocess.STDOUT,
env=run_env,
cwd=cwd) as server_proc):
# Ensure the sever has started

client_dir = f"{example_dir}/clients"
client_cmd = [
'python3', f'{client_dir}/disagg_client.py', '-c',
Expand All @@ -1329,7 +1328,7 @@ def run_disaggregated_benchmark(example_dir,
'--server-start-timeout',
str(server_start_timeout)
]
# Warm up
# Ensure the sever has started and workers are ready
check_call(client_cmd,
env=env,
poll_procs=[workers_proc, server_proc])
Expand Down Expand Up @@ -1366,9 +1365,6 @@ def run_disaggregated_benchmark(example_dir,
'--percentile-metrics',
'e2el,ttft',
]
# warm up
if not skip_warmup:
check_call(benchmark_cmd, env=env)
output = check_output(benchmark_cmd, env=env)
e2el_pattern = r"Median E2EL \(ms\):\s*(\d+\.?\d*)"
ttft_pattern = r"Median TTFT \(ms\):\s*(\d+\.?\d*)"
Expand Down Expand Up @@ -1513,8 +1509,7 @@ def test_disaggregated_deepseek_v3_lite_bf16_empty_batch(
num_prompts=10,
max_concurrency=10,
random_input_len=384,
random_output_len=1536,
skip_warmup=True)
random_output_len=1536)
print(f"E2EL: {e2el} ms, TTFT: {ttft} ms")

assert e2el > 0 and ttft > 0
1 change: 0 additions & 1 deletion tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[4-0-disableDecoupleMode-te
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-disableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-tensorrt_llm] SKIP (https://nvbugs/5461874)
cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670)
disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682)
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574)
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm-fp8] SKIP (https://nvbugs/5608790)
full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O)
Expand Down