diff --git a/tensorrt_llm/commands/serve.py b/tensorrt_llm/commands/serve.py index 5b91b110ede..3320abc6ad8 100644 --- a/tensorrt_llm/commands/serve.py +++ b/tensorrt_llm/commands/serve.py @@ -29,7 +29,7 @@ parse_disagg_config_file, parse_metadata_server_config_file) from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_dict -from tensorrt_llm.llmapi.mpi_session import find_free_port +from tensorrt_llm.llmapi.mpi_session import find_free_ipc_addr from tensorrt_llm.llmapi.reasoning_parser import ReasoningParserFactory from tensorrt_llm.logger import logger, severity_map from tensorrt_llm.serve import OpenAIDisaggServer, OpenAIServer @@ -722,10 +722,10 @@ def _launch_disaggregated_leader(sub_comm, instance_idx: int, config_file: str, # This mimics the behavior of trtllm-llmapi-launch # TODO: Make the port allocation atomic - free_port = find_free_port() + free_ipc_addr = find_free_ipc_addr() os.environ[LlmLauncherEnvs.TLLM_SPAWN_PROXY_PROCESS] = "1" - os.environ[LlmLauncherEnvs.TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR. - value] = f"tcp://127.0.0.1:{free_port}" + os.environ[ + LlmLauncherEnvs.TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR.value] = free_ipc_addr os.environ[DisaggLauncherEnvs.TLLM_DISAGG_RUN_REMOTE_MPI_SESSION_CLIENT. value] = "1" os.environ[DisaggLauncherEnvs.TLLM_DISAGG_INSTANCE_IDX] = str(instance_idx) diff --git a/tensorrt_llm/llmapi/mpi_session.py b/tensorrt_llm/llmapi/mpi_session.py index f0275d7f90a..d32e5a7b7aa 100644 --- a/tensorrt_llm/llmapi/mpi_session.py +++ b/tensorrt_llm/llmapi/mpi_session.py @@ -541,6 +541,13 @@ def find_free_port() -> int: return s.getsockname()[1] +def find_free_ipc_addr() -> str: + import os + import tempfile + import uuid + return f'ipc://{os.path.join(tempfile.gettempdir(), "rpc_" + str(uuid.uuid4()))}' + + def get_mpi_world_size() -> int: # avoid cyclic import from ..executor.utils import get_spawn_proxy_process_env