Skip to content

Commit eee58cb

Browse files
committed
remove debug comments
1 parent 96d04a5 commit eee58cb

File tree

6 files changed

+8
-71
lines changed

6 files changed

+8
-71
lines changed

tensorrt_llm/_torch/models/modeling_deepseekv3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def forward(self,
167167
from ..distributed import allgather
168168
import os
169169
from tensorrt_llm.mapping import Mapping
170-
lm_tp_size = int(os.getenv('LM_TP_SIZE', 8))
170+
lm_tp_size = int(os.getenv('LM_TP_SIZE', 2))
171171
assert self.model_config.mapping.tp_size % lm_tp_size == 0
172172
lm_pp_size = self.model_config.mapping.pp_size * self.model_config.mapping.tp_size // lm_tp_size
173173
mapping_lm_tp = Mapping(

tensorrt_llm/_torch/models/modeling_speculative.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,6 @@ def forward(
433433
attn_metadata,
434434
True,
435435
)
436-
# print(f"lm_head.weight.data_ptr: {self.lm_head.weight.data_ptr()}")
437-
# print(f"lm_head.weight.shape: {self.lm_head.weight.shape}")
438-
# print(f"In SpecDecOneEngineForCausalLM, before spec_worker, logits.shape: {logits.shape}")
439-
# print(f"draft_model.lm_head.weight.data_ptr: {self.draft_model.lm_head.weight.data_ptr()}")
440-
# print(f"draft_model.lm_head.weight.shape: {self.draft_model.lm_head.weight.shape}")
441436
# get accepted tokens and next draft tokens
442437
return self.spec_worker(input_ids=input_ids,
443438
position_ids=position_ids,

tensorrt_llm/_torch/models/modeling_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,14 +353,12 @@ def __init__(self, model: TModel, *, config: ModelConfig[TConfig],
353353
self.has_custom_lm_head = False
354354

355355
if config.mapping.enable_attention_dp and not getattr(config.mapping, 'enable_lm_tp_in_adp', False):
356-
print(f"In DecoderModelForCausalLM, creating LMHead without TP")
357356
self.lm_head = LMHead(
358357
vocab_size,
359358
hidden_size,
360359
dtype=config.pretrained_config.torch_dtype,
361360
)
362361
else:
363-
print(f"In DecoderModelForCausalLM, creating LMHead with TP")
364362
# TODO(zhenhuanc): Currently lm_head Linear will not accept QuantConfig
365363
# will considering per layer QuantConfig in the future.
366364
if (hasattr(config, 'lora_config')

tensorrt_llm/_torch/modules/embedding.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,10 @@ def __init__(
3737
local_out_features = num_embeddings
3838
mapping = mapping or Mapping()
3939
if (mapping.enable_attention_dp and
40-
getattr(mapping, 'enable_lm_tp_in_adp', False)) and os.getenv('LM_TP_SIZE') is not None:
41-
lm_tp_size = int(os.getenv('LM_TP_SIZE', 8))
40+
getattr(mapping, 'enable_lm_tp_in_adp', False)):
41+
lm_tp_size = int(os.getenv('LM_TP_SIZE', 2))
4242
assert mapping.tp_size % lm_tp_size == 0, f"mapping.tp_size % lm_tp_size == 0, {mapping.tp_size} % {lm_tp_size} != 0"
4343
lm_pp_size = mapping.pp_size * mapping.tp_size // lm_tp_size
44-
# print(f"In LMHead, mapping.tp_group: {mapping.tp_group}")
4544
mapping = Mapping(
4645
world_size=lm_tp_size * lm_pp_size,
4746
rank=mapping.rank,
@@ -51,7 +50,6 @@ def __init__(
5150
enable_attention_dp=mapping.enable_attention_dp,
5251
enable_lm_tp_in_adp=mapping.enable_lm_tp_in_adp,
5352
)
54-
print(f"In LMHead, mapping_lm_tp.tp_group: {mapping.tp_group}")
5553

5654
tp_size = mapping.tp_size
5755

@@ -104,7 +102,8 @@ def forward(
104102
all_reduce_params: Optional[AllReduceParams] = None,
105103
is_mtp_head: bool = False,
106104
) -> torch.Tensor:
107-
if is_mtp_head and getattr(self.mapping, 'enable_lm_tp_in_adp', False):
105+
if is_mtp_head and (self.mapping.enable_attention_dp and
106+
getattr(self.mapping, 'enable_lm_tp_in_adp', False)):
108107
tp_rank = self.mapping.tp_rank
109108
tp_size = self.mapping.tp_size
110109
tensor_shape = self.weight.shape

tensorrt_llm/_torch/modules/logits_processor.py

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99

1010
class LogitsProcessor(nn.Module):
1111

12-
def __init__(self, model_config: ModelConfig):
12+
def __init__(self):
1313
super().__init__()
14-
self.model_config = model_config
1514

1615
def forward(self,
1716
hidden_states: torch.Tensor,
@@ -30,49 +29,6 @@ def forward(self,
3029
else:
3130
hidden_states = hidden_states[-1]
3231

33-
# token_count = hidden_states.view(-1, hidden_states.shape[-1]).shape[0]
34-
35-
# # Add pre-lm gather logic
36-
# if (self.model_config.mapping.enable_attention_dp and getattr(
37-
# self.model_config.mapping, 'enable_lm_tp_in_adp', False)):
38-
# # ADP + LM TP mode: perform All-Gather before LM_head
39-
# from ..distributed import allgather
40-
# all_rank_max_num_tokens = attn_metadata.all_rank_max_num_tokens
41-
# pad_len = all_rank_max_num_tokens - token_count
42-
# if pad_len > 0:
43-
# padded_hidden_states = F.pad(hidden_states.view(
44-
# -1, hidden_states.shape[-1]), (0, 0, 0, pad_len),
45-
# mode="constant",
46-
# value=0)
47-
# else:
48-
# padded_hidden_states = hidden_states.view(
49-
# -1, hidden_states.shape[-1])
50-
# hidden_states = allgather(padded_hidden_states,
51-
# self.model_config.mapping,
52-
# dim=0)
53-
54-
# # Temporarily disable gather_output when not in ADP mode or (in ADP mode and LM TP is enabled)
55-
# if (not self.model_config.mapping.enable_attention_dp) or (
56-
# self.model_config.mapping.enable_attention_dp and getattr(
57-
# self.model_config.mapping, 'enable_lm_tp_in_adp', False)):
58-
# lm_head.gather_output = False
5932
logits = lm_head(hidden_states)
60-
# if (not self.model_config.mapping.enable_attention_dp) or (
61-
# self.model_config.mapping.enable_attention_dp and getattr(
62-
# self.model_config.mapping, 'enable_lm_tp_in_adp', False)):
63-
# lm_head.gather_output = True
64-
65-
# if (self.model_config.mapping.enable_attention_dp and getattr(
66-
# self.model_config.mapping, 'enable_lm_tp_in_adp', False)):
67-
# # print(f"In LogitsProcessor, lm_head.weight.data_ptr: {lm_head.weight.data_ptr()}")
68-
# # print(f"In LogitsProcessor, lm_head.weight.shape: {lm_head.weight.shape}")
69-
# # print(f"In LogitsProcessor, logits.shape: {logits.shape}")
70-
# logits = allgather(logits, self.model_config.mapping, dim=-1)
71-
# batch_size = logits.shape[0]
72-
# local_batch_size = batch_size // self.model_config.mapping.tp_size
73-
# logits = logits.view(self.model_config.mapping.tp_size,
74-
# local_batch_size, -1)
75-
# logits = logits[self.model_config.mapping.tp_rank][:token_count]
76-
# print(f"In LogitsProcessor, final logits.shape: {logits.shape}")
7733
logits = logits.float()
7834
return logits

tensorrt_llm/_torch/speculative/mtp.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -858,12 +858,7 @@ def sample_and_accept_draft_tokens(
858858

859859
# context
860860
accepted_tokens[:num_contexts, 0] = target_tokens[:num_contexts]
861-
# print(
862-
# f"In sample_and_accept_draft_tokens, accepted_tokens.shape: {accepted_tokens.shape}, num_contexts: {num_contexts}, mapping: {self.model_config.mapping.tp_rank}"
863-
# )
864-
# print(
865-
# f"In sample_and_accept_draft_tokens, target_tokens.shape: {target_tokens.shape}, num_gens: {num_gens}, mtp_num_modules: {mtp_num_modules}, mapping: {self.model_config.mapping.tp_rank}"
866-
# )
861+
867862
# generation
868863
gen_target_tokens = target_tokens[num_contexts:].reshape(
869864
num_gens, mtp_num_modules + 1)
@@ -1123,10 +1118,9 @@ def draft_sampler(
11231118
self.model_config.mapping, 'enable_lm_tp_in_adp', False)):
11241119
# For ADP + LM TP mode, we need to find the global argmax across all TP ranks
11251120
# First, get local argmax and max values
1126-
# print(f"In draft_sampler, initial logits.shape: {logits.shape}")
11271121
import os
11281122
from tensorrt_llm.mapping import Mapping
1129-
lm_tp_size = int(os.getenv('LM_TP_SIZE', 8))
1123+
lm_tp_size = int(os.getenv('LM_TP_SIZE', 2))
11301124
assert self.model_config.mapping.tp_size % lm_tp_size == 0
11311125
lm_pp_size = self.model_config.mapping.pp_size * self.model_config.mapping.tp_size // lm_tp_size
11321126
mapping_lm_tp = Mapping(
@@ -1145,8 +1139,6 @@ def draft_sampler(
11451139
gathered = gathered.view(mapping_lm_tp.tp_size, local_batch_size, -1)
11461140
sliced_gathered = gathered[mapping_lm_tp.tp_rank]
11471141
draft_tokens = self.get_draft_tokens_from_gathered(sliced_gathered)
1148-
# draft_tokens = torch.argmax(sliced_gathered,
1149-
# dim=-1).type(torch.int32)
11501142
else:
11511143
# Simple argmax if no TP or no model config
11521144
draft_tokens = torch.argmax(logits, dim=-1).type(torch.int32)
@@ -1246,9 +1238,6 @@ def prepare_position_ids_and_last_tokens(position_ids, attn_metadata):
12461238
**inputs)
12471239
# All of the seq_len are 1, use batch_indices_cuda as gather_ids
12481240
gather_ids = spec_metadata.batch_indices_cuda[:batch_size]
1249-
# print(f"In MTPEagleWorker, hidden_states.shape: {hidden_states.shape}, hidden_states[gather_ids].shape: {hidden_states[gather_ids].shape}")
1250-
# print(f"In MTPEagleWorker, gather_ids.shape: {gather_ids.shape}, mapping: {self.model_config.mapping.tp_rank}")
1251-
# print(f"In MTPEagleWorker, spec_metadata.max_num_requests: {spec_metadata.max_num_requests}, mapping: {self.model_config.mapping.tp_rank}")
12521241
hidden_states_gathered = hidden_states[gather_ids]
12531242
token_count = hidden_states_gathered.view(-1,
12541243
hidden_states_gathered.shape[-1]).shape[0]

0 commit comments

Comments
 (0)