Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions vllm/distributed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import vllm.envs as envs
from vllm.logger import init_logger
from vllm.platforms import CpuArchEnum, Platform
from vllm.utils.network_utils import get_tcp_uri
from vllm.utils.system_utils import suppress_stdout

Expand All @@ -38,9 +39,15 @@
# We prefer to use os.sched_yield as it results in tighter polling loops,
# measured to be around 3e-7 seconds. However on earlier versions of Python
# os.sched_yield() does not release the GIL, so we fall back to time.sleep(0)
USE_SCHED_YIELD = (sys.version_info[:3] >= (3, 11, 1)) or (
sys.version_info[:2] == (3, 10) and sys.version_info[2] >= 8
)
#
# On Arm systems, os.sched_yield does not take effect, causing the GIL
# (Global Interpreter Lock) to remain unrelinquished and resulting in CPU bound
# issues. we should making the process execute time.sleep(0) instead to release
# the GIL.
USE_SCHED_YIELD = (
(sys.version_info[:3] >= (3, 11, 1))
or (sys.version_info[:2] == (3, 10) and sys.version_info[2] >= 8)
) and Platform.get_cpu_architecture() != CpuArchEnum.ARM


def sched_yield():
Expand Down
Loading