@@ -30,12 +30,12 @@ def run_prefill(prefill_done):
3030 ]
3131 sampling_params = SamplingParams (temperature = 0 , top_p = 0.95 , max_tokens = 1 )
3232
33- # Using PyNcclConnector to transmit KV caches between vLLM instances.
33+ # Using P2pNcclConnector to transmit KV caches between vLLM instances.
3434 # This instance is the prefill node (kv_producer, rank 0).
3535 # The number of parallel instances for KV cache transfer is set to 2,
36- # as required for PyNcclConnector .
36+ # as required for P2pNcclConnector .
3737 ktc = KVTransferConfig (
38- kv_connector = "PyNcclConnector " ,
38+ kv_connector = "P2pNcclConnector " ,
3939 kv_role = "kv_producer" ,
4040 kv_rank = 0 ,
4141 kv_parallel_size = 2 ,
@@ -74,12 +74,12 @@ def run_decode(prefill_done):
7474 ]
7575 sampling_params = SamplingParams (temperature = 0 , top_p = 0.95 )
7676
77- # Using PyNcclConnector to transmit KV caches between vLLM instances.
77+ # Using P2pNcclConnector to transmit KV caches between vLLM instances.
7878 # This instance is the decode node (kv_consumer, rank 1).
7979 # The number of parallel instances for KV cache transfer is set to 2,
80- # as required for PyNcclConnector .
80+ # as required for P2pNcclConnector .
8181 ktc = KVTransferConfig (
82- kv_connector = "PyNcclConnector " ,
82+ kv_connector = "P2pNcclConnector " ,
8383 kv_role = "kv_consumer" ,
8484 kv_rank = 1 ,
8585 kv_parallel_size = 2 ,
0 commit comments