From bbb077493cd00db75989e3494e101548210f6840 Mon Sep 17 00:00:00 2001 From: Tsundoku958 Date: Fri, 19 Sep 2025 06:31:05 +0000 Subject: [PATCH 1/2] fix bug: dp+tp warmup --- lmdeploy/pytorch/engine/model_agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lmdeploy/pytorch/engine/model_agent.py b/lmdeploy/pytorch/engine/model_agent.py index 7b332b6f0c..03483c4977 100644 --- a/lmdeploy/pytorch/engine/model_agent.py +++ b/lmdeploy/pytorch/engine/model_agent.py @@ -401,6 +401,7 @@ def warmup(self): is_decoding=False, device='cuda', vocab_size=self.model_config.vocab_size) + inputs.build_dp_meta() self._forward_impl(inputs) # warmup decoding(with cuda graph) @@ -411,6 +412,7 @@ def warmup(self): is_decoding=True, device='cuda', vocab_size=self.model_config.vocab_size) + inputs.build_dp_meta() self._forward_impl(inputs) def _slice_outs(self, inputs: torch.Tensor, seq_length: torch.LongTensor): From 54eabb38b7e505c92f992c3004739ee6044283f1 Mon Sep 17 00:00:00 2001 From: Tsundoku958 Date: Fri, 19 Sep 2025 10:12:30 +0000 Subject: [PATCH 2/2] assert dp size --- lmdeploy/pytorch/engine/model_agent.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lmdeploy/pytorch/engine/model_agent.py b/lmdeploy/pytorch/engine/model_agent.py index 03483c4977..2f107e18bd 100644 --- a/lmdeploy/pytorch/engine/model_agent.py +++ b/lmdeploy/pytorch/engine/model_agent.py @@ -395,13 +395,15 @@ def warmup(self): with self.all_context(): max_batches = self.cache_config.max_batches num_tokens = max_batches - + dist_ctx = get_dist_manager().current_context() + dp = dist_ctx.dp # warmup prefill inputs = self.inputs_strategy.make_dummy(max_batches, is_decoding=False, device='cuda', vocab_size=self.model_config.vocab_size) - inputs.build_dp_meta() + if dp > 1: + inputs.build_dp_meta() self._forward_impl(inputs) # warmup decoding(with cuda graph) @@ -412,7 +414,8 @@ def warmup(self): is_decoding=True, device='cuda', vocab_size=self.model_config.vocab_size) - inputs.build_dp_meta() + if dp > 1: + inputs.build_dp_meta() self._forward_impl(inputs) def _slice_outs(self, inputs: torch.Tensor, seq_length: torch.LongTensor):