Skip to content

Commit e612bd6

Browse files
committed
Temporary code storage
1 parent 891544c commit e612bd6

File tree

3 files changed

+188
-8
lines changed

3 files changed

+188
-8
lines changed

fastdeploy/model_executor/layers/attention/append_attn_backend.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,37 @@ def init_attention_metadata(self, forward_meta: ForwardMeta):
155155
)
156156

157157
self.attention_metadata: AttentionMetadata = metadata
158+
159+
160+
try:
161+
from paddleformers.utils.log import logger
162+
is_prefill = int(paddle.max(forward_meta.seq_lens_encoder).item()) > 0
163+
if is_prefill: # 只在 prefill 阶段打印
164+
logger.info("\n\n" + "="*30)
165+
logger.info("GOLDEN METADATA DUMP (FROM OLD VERSION)")
166+
logger.info("="*30)
167+
168+
def log_tensor_as_list(tensor, name):
169+
if tensor is not None:
170+
# 使用 .numpy() 获取 CPU 上的值
171+
logger.info(f"'{name}': {tensor.numpy().flatten().tolist()},")
172+
else:
173+
logger.info(f"'{name}': None,")
174+
175+
log_tensor_as_list(forward_meta.encoder_num_blocks_x_cpu, "encoder_num_blocks_x_cpu")
176+
log_tensor_as_list(forward_meta.kv_num_blocks_x_cpu, "kv_num_blocks_x_cpu")
177+
log_tensor_as_list(forward_meta.decoder_num_blocks_cpu, "decoder_num_blocks_cpu")
178+
log_tensor_as_list(forward_meta.max_len_tensor_cpu, "max_len_tensor_cpu")
179+
log_tensor_as_list(forward_meta.max_len_kv_cpu, "max_len_kv_cpu")
180+
181+
# (可选,但最好有) 如果 encoder_batch_ids 等张量尺寸不大,也打印出来
182+
log_tensor_as_list(forward_meta.encoder_batch_ids, "encoder_batch_ids")
183+
log_tensor_as_list(forward_meta.encoder_tile_ids_per_batch, "encoder_tile_ids_per_batch")
184+
185+
logger.info("="*30 + "\n\n")
186+
except Exception as e:
187+
# 忽略可能的打印错误
188+
pass
158189

159190
def get_attntion_meta(self) -> AttentionMetadata:
160191
"""get_attntion_meta"""
@@ -197,6 +228,76 @@ def forward_mixed(
197228
"""
198229
forward_mixed
199230
"""
231+
232+
# ==================== [START] 全面 Debug 打印代码 ====================
233+
is_capturing_cudagraph = forward_meta.step_use_cudagraph
234+
235+
if not is_capturing_cudagraph and layer.layer_id == 7: # 只打印我们关心的GQA层
236+
try:
237+
from fastdeploy.model_executor.models.minimax_m1 import print_tensor_stats
238+
from paddleformers.utils.log import logger
239+
except ImportError:
240+
import logging
241+
logger = logging.getLogger(__name__)
242+
def print_tensor_stats(tensor, name):
243+
if tensor is not None:
244+
logger.info(f"--- [FD DEBUG] {name} Shape: {tensor.shape}, DType: {tensor.dtype} ---")
245+
else:
246+
logger.info(f"--- [FD DEBUG] {name} is None ---")
247+
248+
logger.info(f"\n{'='*25}\n[FD DEBUG] DETAILED DUMP for append_attention @ Layer {layer.layer_id}\n{'='*25}")
249+
250+
# 1. 打印 atención 模块的配置参数
251+
logger.info(f"--- [FD DEBUG] Attention Config ---")
252+
logger.info(f" - use_neox_rotary_style: {layer.use_neox_rotary_style}")
253+
logger.info(f" - sliding_window: {layer.sliding_window}")
254+
logger.info(f" - causal: {self.causal}")
255+
logger.info(f" - speculative_method is not None: {self.speculative_method is not None}")
256+
logger.info(f" - head_dim: {self.head_dim}, num_heads: {self.num_heads}, num_kv_heads: {self.kv_num_heads}")
257+
logger.info(f"--------------------------\n")
258+
259+
# 2. 打印核心输入张量 (qkv)
260+
print_tensor_stats(qkv, f"FD_L{layer.layer_id}_INPUT:qkv_combined")
261+
262+
# 3. 打印 KV Cache 相关张量
263+
cache_k = forward_meta.caches[2 * layer.layer_id]
264+
cache_v = forward_meta.caches[2 * layer.layer_id + 1]
265+
print_tensor_stats(cache_k, f"FD_L{layer.layer_id}_INPUT:cache_k")
266+
print_tensor_stats(cache_v, f"FD_L{layer.layer_id}_INPUT:cache_v")
267+
print_tensor_stats(forward_meta.block_tables, f"FD_L{layer.layer_id}_META:block_tables")
268+
269+
# 4. 打印序列长度和位置信息
270+
print_tensor_stats(forward_meta.seq_lens_encoder, f"FD_L{layer.layer_id}_META:seq_lens_encoder")
271+
print_tensor_stats(forward_meta.seq_lens_decoder, f"FD_L{layer.layer_id}_META:seq_lens_decoder")
272+
print_tensor_stats(forward_meta.seq_lens_this_time, f"FD_L{layer.layer_id}_META:seq_lens_this_time")
273+
print_tensor_stats(forward_meta.batch_id_per_token, f"FD_L{layer.layer_id}_META:batch_id_per_token")
274+
print_tensor_stats(forward_meta.cu_seqlens_q, f"FD_L{layer.layer_id}_META:cu_seqlens_q")
275+
276+
# 5. 打印 RoPE 查找表
277+
metadata = self.attention_metadata
278+
print_tensor_stats(metadata.rotary_embs, f"FD_L{layer.layer_id}_INPUT:rotary_embs_table")
279+
280+
# 6. 打印用于 Kernel 内部计算的 Tile/Block 划分信息 (非常重要!)
281+
print_tensor_stats(forward_meta.encoder_batch_ids, f"FD_L{layer.layer_id}_META:encoder_batch_ids")
282+
print_tensor_stats(forward_meta.encoder_tile_ids_per_batch, f"FD_L{layer.layer_id}_META:encoder_tile_ids_per_batch")
283+
print_tensor_stats(forward_meta.encoder_num_blocks_x_cpu, f"FD_L{layer.layer_id}_META:encoder_num_blocks_x_cpu")
284+
285+
print_tensor_stats(forward_meta.kv_batch_ids, f"FD_L{layer.layer_id}_META:kv_batch_ids")
286+
print_tensor_stats(forward_meta.kv_tile_ids_per_batch, f"FD_L{layer.layer_id}_META:kv_tile_ids_per_batch")
287+
print_tensor_stats(forward_meta.kv_num_blocks_x_cpu, f"FD_L{layer.layer_id}_META:kv_num_blocks_x_cpu")
288+
289+
print_tensor_stats(forward_meta.decoder_batch_ids, f"FD_L{layer.layer_id}_META:decoder_batch_ids")
290+
print_tensor_stats(forward_meta.decoder_tile_ids_per_batch, f"FD_L{layer.layer_id}_META:decoder_tile_ids_per_batch")
291+
print_tensor_stats(forward_meta.decoder_num_blocks_cpu, f"FD_L{layer.layer_id}_META:decoder_num_blocks_cpu")
292+
293+
# 7. 打印 max_len_tensor_cpu (非常重要!)
294+
print_tensor_stats(forward_meta.max_len_tensor_cpu, f"FD_L{layer.layer_id}_META:max_len_tensor_cpu")
295+
296+
logger.info(f"\n{'='*25}\n[FD DEBUG] END OF DUMP for append_attention @ Layer {layer.layer_id}\n{'='*25}\n")
297+
298+
# ==================== [END] 全面 Debug 打印代码 ====================
299+
300+
200301
metadata = self.attention_metadata
201302

202303
sliding_window = layer.sliding_window

fastdeploy/model_executor/layers/rotary_embedding.py

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@
2828

2929
from .utils import CpuGuard
3030

31+
from paddleformers.utils.log import logger # 确保 logger 被导入
32+
33+
# 导入你的打印函数
34+
try:
35+
from fastdeploy.model_executor.models.minimax_m1 import print_tensor_stats
36+
except ImportError:
37+
# 如果无法导入,定义一个备用函数以避免程序崩溃
38+
import pprint
39+
def print_tensor_stats(tensor, name):
40+
logger.info(f"--- [FD DEBUG] {name} --- (print_tensor_stats not found, simple log)")
41+
if tensor is not None:
42+
logger.info(f"Shape: {tensor.shape}, DType: {tensor.dtype}")
43+
else:
44+
logger.info("Tensor is None")
45+
3146

3247
class ErnieRotaryEmbedding:
3348
def __init__(self, rotary_dim, base, partial_rotary_factor):
@@ -79,29 +94,82 @@ def __call__(self, position_ids):
7994
return rot_emb
8095

8196

97+
# class GlmRotaryEmbedding:
98+
# def __init__(self, rotary_dim, base, partial_rotary_factor):
99+
# """
100+
# Pre-calculate rotary position embedding for position_ids.
101+
# """
102+
# self.rotary_dim = rotary_dim
103+
# self.base = base
104+
# if partial_rotary_factor < 1.0:
105+
# self.rotary_dim = int(self.rotary_dim * partial_rotary_factor)
106+
107+
# def __call__(self, position_ids):
108+
# bsz, max_seq_len = position_ids.shape[:2]
109+
# inv_freq = self.base ** (-paddle.arange(0, self.rotary_dim, 2, dtype="float32") / self.rotary_dim)
110+
# freqs = paddle.einsum("ij,k->ijk", position_ids.cast("float32"), inv_freq)
111+
# # shape: [B, S, D/2]
112+
# rot_emb = paddle.zeros((2, bsz, max_seq_len, 1, self.rotary_dim // 2), dtype="float32")
113+
# emb = paddle.stack([freqs], axis=-1).reshape((bsz, max_seq_len, self.rotary_dim // 2))
114+
# # shape: [B, S, 1, D]
115+
# emb = paddle.unsqueeze(emb, 2)
116+
# rot_emb[0] = paddle.cos(emb)
117+
# rot_emb[1] = paddle.sin(emb)
118+
# return rot_emb
119+
120+
121+
82122
class GlmRotaryEmbedding:
83123
def __init__(self, rotary_dim, base, partial_rotary_factor):
84124
"""
85125
Pre-calculate rotary position embedding for position_ids.
86126
"""
87-
self.rotary_dim = rotary_dim
127+
# --- 详细日志 ---
128+
logger.info(">>>> [GlmRotaryEmbedding.__init__] <<<<")
129+
logger.info(f" - Received rotary_dim (as head_dim): {rotary_dim}")
130+
logger.info(f" - Received partial_rotary_factor: {partial_rotary_factor}")
131+
88132
self.base = base
133+
134+
# 核心计算
89135
if partial_rotary_factor < 1.0:
90-
self.rotary_dim = int(self.rotary_dim * partial_rotary_factor)
136+
self.rotary_dim = int(rotary_dim * partial_rotary_factor)
137+
else:
138+
self.rotary_dim = rotary_dim
139+
140+
logger.info(f" - Calculated final self.rotary_dim: {self.rotary_dim}")
141+
# --- 日志结束 ---
91142

92143
def __call__(self, position_ids):
144+
# --- 详细日志 ---
145+
logger.info(">>>> [GlmRotaryEmbedding.__call__] <<<<")
146+
logger.info(f" - Using self.rotary_dim: {self.rotary_dim}")
147+
logger.info(f" - Using self.base: {self.base}")
148+
93149
bsz, max_seq_len = position_ids.shape[:2]
94-
inv_freq = self.base ** (-paddle.arange(0, self.rotary_dim, 2, dtype="float32") / self.rotary_dim)
150+
151+
# 检查 arange 的上界
152+
arange_upper_bound = self.rotary_dim
153+
logger.info(f" - paddle.arange upper bound is: {arange_upper_bound}")
154+
155+
# 关键计算步骤
156+
inv_freq_dims = paddle.arange(0, arange_upper_bound, 2, dtype="float32")
157+
logger.info(f" - Shape of inv_freq_dims (from arange): {inv_freq_dims.shape}") # 这一行会告诉我们最终维度
158+
159+
inv_freq = self.base ** (-inv_freq_dims / self.rotary_dim)
95160
freqs = paddle.einsum("ij,k->ijk", position_ids.cast("float32"), inv_freq)
96-
# shape: [B, S, D/2]
161+
97162
rot_emb = paddle.zeros((2, bsz, max_seq_len, 1, self.rotary_dim // 2), dtype="float32")
98163
emb = paddle.stack([freqs], axis=-1).reshape((bsz, max_seq_len, self.rotary_dim // 2))
99-
# shape: [B, S, 1, D]
164+
100165
emb = paddle.unsqueeze(emb, 2)
101166
rot_emb[0] = paddle.cos(emb)
102167
rot_emb[1] = paddle.sin(emb)
103-
return rot_emb
168+
169+
logger.info(f" - Final returned rot_emb shape: {rot_emb.shape}")
170+
logger.info(">>>> [GlmRotaryEmbedding.__call__ END] <<<<")
104171

172+
return rot_emb
105173

106174
class QwenRotaryEmbedding:
107175
def __init__(self, rotary_dim, base, partial_rotary_factor):
@@ -131,7 +199,6 @@ def __call__(self, position_ids):
131199

132200
return rot_emb
133201

134-
135202
def yarn_get_mscale(scale=1, mscale=1):
136203
""" """
137204
if scale <= 1:
@@ -332,11 +399,14 @@ def get_rope_impl(
332399
"""
333400
The real implementation of get_rope
334401
"""
402+
print_tensor_stats(position_ids[:, :16], "ROPE_IMPL_INPUT:position_ids[:, :16]")
335403

336404
architecture = model_config.architectures[0]
405+
# if architecture.startswith("Qwen") or architecture.startswith("MiniMaxM1"):
337406
if architecture.startswith("Qwen"):
338407
rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
339408
rotary_emb = rotary_emb_layer(position_ids)
409+
# elif architecture.startswith("Glm"):
340410
elif architecture.startswith("Glm") or architecture.startswith("MiniMaxM1"):
341411
rotary_emb_layer = GlmRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
342412
rotary_emb = rotary_emb_layer(position_ids)
@@ -354,6 +424,15 @@ def get_rope_impl(
354424
else:
355425
rotary_emb_layer = ErnieRotaryEmbedding(rotary_dim, base, partial_rotary_factor)
356426
rotary_emb = rotary_emb_layer(position_ids)
427+
428+
# if rotary_emb.ndim == 5:
429+
# logger.info(f">>>> [ROPE RESHAPE] Squeezing rotary_emb from {rotary_emb.shape} <<<<")
430+
# rotary_emb = paddle.squeeze(rotary_emb, axis=[1, 3])
431+
# logger.info(f">>>> [ROPE RESHAPE] New shape is {rotary_emb.shape} <<<<")
432+
433+
# ... (之前的日志打印)
434+
print_tensor_stats(rotary_emb[0, :16], "ROPE_IMPL_OUTPUT:cos_emb[:16]")
435+
print_tensor_stats(rotary_emb[1, :16], "ROPE_IMPL_OUTPUT:sin_emb[:16]")
357436
return rotary_emb
358437

359438

fastdeploy/model_executor/models/minimax_m1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ def forward(self, forward_meta: ForwardMeta, hidden_states: paddle.Tensor, resid
492492
print_tensor_stats(k_before_rope, f"FD_L{layer_id}:1d_K_BeforeRoPE")
493493
print_tensor_stats(v_tensor, f"FD_L{layer_id}:1e_V_Tensor")
494494
logger.info(f"--- [FD DEBUG] ForwardMeta DUMP FOR LAYER {layer_id} ---")
495-
# 1. RoPE 缓存 (最关键的)
495+
# 1. RoPE 缓存
496496
# 我们需要知道它的形状,以确认是否正确生成
497497
if forward_meta.rotary_embs is not None:
498498
logger.info("--- [FD DEBUG] forward_meta.rotary_embs ---")

0 commit comments

Comments
 (0)