Skip to content

Commit fed6b3b

Browse files
committed
finish all send requests before quitting pp event-loop to avoid mpi deadlock; synchronize sampler right after async calls to avoid hang
Signed-off-by: Lizhi Zhou <[email protected]>
1 parent 6eb0d0b commit fed6b3b

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,6 @@ def _executor_loop_pp(self):
759759
assert sample_state is not None, "Sampling failed"
760760
sample_state.host.logits = logits_host
761761
self._update_request_states(scheduled_batch)
762-
sample_state.sampler_event.synchronize()
763762

764763
if self.enable_iter_perf_stats:
765764
iter_stats.inflight_batching_stats.num_ctx_tokens = self.model_engine.iter_states[
@@ -809,6 +808,7 @@ def _executor_loop_pp(self):
809808
if not self.dist.is_second_last_pp_rank:
810809
if self.send_handles[prev_microbatch_id] is not None:
811810
self.send_handles[prev_microbatch_id].wait()
811+
self.send_handles[prev_microbatch_id] = None
812812
needs_logits = (
813813
self._need_return_logits(scheduled_batch)
814814
or (self._need_return_log_probs(scheduled_batch)

0 commit comments

Comments
 (0)