From 04c8191225c92412c052a2dac7582d60e1ae9855 Mon Sep 17 00:00:00 2001 From: yasong Date: Tue, 28 Apr 2026 05:14:40 +0000 Subject: [PATCH] [Bugfix] Exclude numa_bind fields from ParallelConfig DP hash When `numa_bind=True` and `numa_bind_nodes` is left unset, each DP rank auto-detects its own NUMA node and writes it back into ParallelConfig (see `_get_numa_node` in vllm/utils/numa_utils.py). Because `numa_bind_nodes` is a hashed factor in `ParallelConfig.compute_hash`, ranks on different NUMA nodes then produce different hashes and the DP worker configuration consistency check fails at startup with: RuntimeError: Configuration mismatch detected for engine N. All DP workers must have identical configurations for parameters that affect collective communication ... NUMA binding only affects host-side memory locality and is intentionally per-rank, so it does not influence collective-communication semantics. Add `numa_bind`, `numa_bind_nodes`, and `numa_bind_cpus` to the `ignored_factors` set so DP ranks with different per-rank NUMA bindings hash identically. Reproduced on GB300 (4 NUMA nodes, 1 worker per node, DP=4) with `--numa-bind` and no explicit `--numa-bind-nodes`. Signed-off-by: yasong --- vllm/config/parallel.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index afd0d1dd501a..4f903eeefa6d 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -713,6 +713,14 @@ def compute_hash(self): "worker_extension_cls", "_api_process_count", "_api_process_rank", + # NUMA binding is per-rank host-side memory locality; it does + # not affect collective-communication semantics. When numa_bind + # is enabled with auto-detection, each DP rank stores its own + # NUMA node in numa_bind_nodes (see vllm/utils/numa_utils.py + # `_get_numa_node`), which would otherwise diverge the DP hash. + "numa_bind", + "numa_bind_nodes", + "numa_bind_cpus", } from vllm.config.utils import get_hash_factors, hash_factors