From ebeee7ef0b493a85a07ffbc076d6ad037be2c939 Mon Sep 17 00:00:00 2001 From: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:03:58 +0000 Subject: [PATCH] Mitigate test timeout issues Signed-off-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com> --- .../defs/accuracy/test_disaggregated_serving.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 6f52c6e7dbe..bace0febe0a 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -146,6 +146,7 @@ def launch_disaggregated_llm( for i, port in enumerate(ctx_ports): env_ctx = os.environ.copy() + env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1" gpu_range = range(current_gpu_offset, current_gpu_offset + ctx_total_gpus) env_ctx["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range)) @@ -166,6 +167,7 @@ def launch_disaggregated_llm( for i, port in enumerate(gen_ports): env_gen = os.environ.copy() + env_ctx["TRTLLM_USE_UCX_KVCACHE"] = "1" gpu_range = range(current_gpu_offset, current_gpu_offset + gen_total_gpus) env_gen["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_range)) @@ -1103,15 +1105,12 @@ def test_chunked_prefill(self): }, "enable_chunked_prefill": True, "max_num_tokens": 256, - "max_batch_size": - 1, # max_batch_size=1 will stabilize the accuracy test result at a cost of speed } gen_server_config = { "cuda_graph_config": None, "cache_transceiver_config": { "backend": "DEFAULT" - }, - "max_batch_size": 1, + } } disaggregated_server_config = { "hostname": "localhost",