diff --git a/release/llm_tests/serve/test_llm_serve_multi_node_integration.py b/release/llm_tests/serve/test_llm_serve_multi_node_integration.py index 4e4e0dfc2b32..07ff92eb4f6b 100644 --- a/release/llm_tests/serve/test_llm_serve_multi_node_integration.py +++ b/release/llm_tests/serve/test_llm_serve_multi_node_integration.py @@ -3,13 +3,11 @@ import ray from ray import serve from ray.serve.llm import ( + build_dp_deployment, + build_openai_app, LLMConfig, LLMServingArgs, ModelLoadingConfig, - build_openai_app, -) -from ray.llm._internal.serve.serving_patterns.data_parallel.dp_server import ( - build_dp_deployment, ) @@ -100,6 +98,9 @@ def test_llm_serve_data_parallelism(): max_num_batched_tokens=256, enforce_eager=True, ), + experimental_configs=dict( + dp_size_per_node=2, + ), placement_group_config=placement_group_config, runtime_env={"env_vars": {"VLLM_DISABLE_COMPILE_CACHE": "1"}}, )