diff --git a/vllm_omni/entrypoints/omni_stage.py b/vllm_omni/entrypoints/omni_stage.py index 4755b2384b2..e1a3c5cf43a 100644 --- a/vllm_omni/entrypoints/omni_stage.py +++ b/vllm_omni/entrypoints/omni_stage.py @@ -698,6 +698,12 @@ def _stage_worker( in_q = create_zmq_queue(zmq_ctx, in_q, zmq.PULL) if isinstance(out_q, str): out_q = create_zmq_queue(zmq_ctx, out_q, zmq.PUSH) + # When using ZMQ (cross-node IPC), disable SHM so data is sent inline. + shm_threshold_bytes = sys.maxsize + logger.info( + "[Stage-%s] ZMQ transport detected; disabling SHM IPC (shm_threshold_bytes set to maxsize)", + stage_id, + ) # Aggregates for running average _agg_total_tokens = 0 @@ -1092,6 +1098,12 @@ async def _stage_worker_async( in_q = create_zmq_queue(zmq_ctx, in_q, zmq.PULL) if isinstance(out_q, str): out_q = create_zmq_queue(zmq_ctx, out_q, zmq.PUSH) + # When using ZMQ (cross-node IPC), disable SHM so data is sent inline. + shm_threshold_bytes = sys.maxsize + logger.info( + "[Stage-%s] ZMQ transport detected; disabling SHM IPC (shm_threshold_bytes set to maxsize)", + stage_id, + ) # Aggregates for running average _agg_total_tokens = 0