Skip to content

Commit d84bdb0

Browse files
panpan0000eicherseiji
authored andcommitted
Remove deprecated PyNcclConnector (vllm-project#24151)
Signed-off-by: Peter Pan <[email protected]>
1 parent a515fb7 commit d84bdb0

File tree

7 files changed

+15
-15
lines changed

7 files changed

+15
-15
lines changed

benchmarks/disagg_benchmarks/disagg_overhead_benchmark.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ benchmark() {
6262
--max-model-len 10000 \
6363
--gpu-memory-utilization 0.6 \
6464
--kv-transfer-config \
65-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
65+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
6666

6767

6868
CUDA_VISIBLE_DEVICES=1 python3 \
@@ -72,7 +72,7 @@ benchmark() {
7272
--max-model-len 10000 \
7373
--gpu-memory-utilization 0.6 \
7474
--kv-transfer-config \
75-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
75+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
7676

7777
wait_for_server 8100
7878
wait_for_server 8200

benchmarks/disagg_benchmarks/disagg_performance_benchmark.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ launch_disagg_prefill() {
6969
--max-model-len 10000 \
7070
--gpu-memory-utilization 0.6 \
7171
--kv-transfer-config \
72-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
72+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
7373

7474
CUDA_VISIBLE_DEVICES=1 python3 \
7575
-m vllm.entrypoints.openai.api_server \
@@ -78,7 +78,7 @@ launch_disagg_prefill() {
7878
--max-model-len 10000 \
7979
--gpu-memory-utilization 0.6 \
8080
--kv-transfer-config \
81-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
81+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2,"kv_buffer_size":5e9}' &
8282

8383
wait_for_server 8100
8484
wait_for_server 8200

examples/offline_inference/disaggregated_prefill.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ def run_prefill(prefill_done):
3030
]
3131
sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1)
3232

33-
# Using PyNcclConnector to transmit KV caches between vLLM instances.
33+
# Using P2pNcclConnector to transmit KV caches between vLLM instances.
3434
# This instance is the prefill node (kv_producer, rank 0).
3535
# The number of parallel instances for KV cache transfer is set to 2,
36-
# as required for PyNcclConnector.
36+
# as required for P2pNcclConnector.
3737
ktc = KVTransferConfig(
38-
kv_connector="PyNcclConnector",
38+
kv_connector="P2pNcclConnector",
3939
kv_role="kv_producer",
4040
kv_rank=0,
4141
kv_parallel_size=2,
@@ -74,12 +74,12 @@ def run_decode(prefill_done):
7474
]
7575
sampling_params = SamplingParams(temperature=0, top_p=0.95)
7676

77-
# Using PyNcclConnector to transmit KV caches between vLLM instances.
77+
# Using P2pNcclConnector to transmit KV caches between vLLM instances.
7878
# This instance is the decode node (kv_consumer, rank 1).
7979
# The number of parallel instances for KV cache transfer is set to 2,
80-
# as required for PyNcclConnector.
80+
# as required for P2pNcclConnector.
8181
ktc = KVTransferConfig(
82-
kv_connector="PyNcclConnector",
82+
kv_connector="P2pNcclConnector",
8383
kv_role="kv_consumer",
8484
kv_rank=1,
8585
kv_parallel_size=2,

examples/online_serving/disaggregated_prefill.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \
5353
--gpu-memory-utilization 0.8 \
5454
--trust-remote-code \
5555
--kv-transfer-config \
56-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2}' &
56+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2}' &
5757

5858
# decoding instance, which is the KV consumer
5959
CUDA_VISIBLE_DEVICES=1 vllm serve $MODEL_NAME \
@@ -62,7 +62,7 @@ CUDA_VISIBLE_DEVICES=1 vllm serve $MODEL_NAME \
6262
--gpu-memory-utilization 0.8 \
6363
--trust-remote-code \
6464
--kv-transfer-config \
65-
'{"kv_connector":"PyNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2}' &
65+
'{"kv_connector":"P2pNcclConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2}' &
6666

6767
# wait until prefill and decode instances are ready
6868
wait_for_server 8100

tests/kv_transfer/test_lookup_buffer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def stress_test(my_rank, buf, device):
128128
print(f"initialized! My rank is {my_rank}")
129129

130130
config = KVTransferConfig(
131-
kv_connector='PyNcclConnector',
131+
kv_connector='P2pNcclConnector',
132132
kv_buffer_device='cuda',
133133
kv_buffer_size=1e9,
134134
kv_rank=my_rank,

tests/kv_transfer/test_send_recv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def latency_test(my_rank, pipe, nelement, ntensor):
137137
)
138138

139139
config = KVTransferConfig(
140-
kv_connector='PyNcclConnector',
140+
kv_connector='P2pNcclConnector',
141141
kv_buffer_device='cuda',
142142
kv_buffer_size=1e9,
143143
kv_rank=my_rank,

vllm/config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3247,7 +3247,7 @@ class KVTransferConfig:
32473247

32483248
kv_parallel_size: int = 1
32493249
"""The number of parallel instances for KV cache transfer. For
3250-
PyNcclConnector, this should be 2."""
3250+
P2pNcclConnector, this should be 2."""
32513251

32523252
kv_ip: str = "127.0.0.1"
32533253
"""The KV connector ip, used to build distributed connection."""

0 commit comments

Comments
 (0)