Skip to content

Commit 891544c

Browse files
committed
Executable code
1 parent 92c71d4 commit 891544c

File tree

4 files changed

+79
-33
lines changed

4 files changed

+79
-33
lines changed

fastdeploy/config.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,23 @@ def __init__(
219219

220220
if not hasattr(self, "head_dim"):
221221
self.head_dim = self.hidden_size // self.num_attention_heads
222+
223+
rotary_dim = getattr(self, "rotary_dim", None)
224+
head_dim = getattr(self, "head_dim", None)
225+
226+
if (rotary_dim is not None and
227+
head_dim is not None and
228+
rotary_dim < head_dim):
229+
230+
# The calculation and overriding are only performed when partial_rotary_factor is still the default value of 1.0.
231+
if getattr(self, "partial_rotary_factor", 1.0) == 1.0:
232+
self.partial_rotary_factor = rotary_dim / head_dim
233+
logger.info(f"Partial rotation detected via 'rotary_dim'. "
234+
f"Calculated and set 'partial_rotary_factor' to: {self.partial_rotary_factor:.4f}")
235+
236+
current_partial_factor = getattr(self, "partial_rotary_factor", 1.0)
237+
if current_partial_factor < 1.0 and head_dim is not None:
238+
self.rotary_dim = int(head_dim * current_partial_factor)
222239

223240
if hasattr(self, "vision_config"):
224241
self.vision_config = PretrainedConfig.from_dict(self.vision_config)
@@ -227,12 +244,6 @@ def __init__(
227244
self.think_end_id = args.get("think_end_id", -1)
228245
self.im_patch_id = args.get("image_patch_id", -1)
229246
self.line_break_id = args.get("line_break_id", -1)
230-
231-
if (hasattr(self, "rotary_dim") and
232-
hasattr(self, "head_dim") and
233-
self.rotary_dim < self.head_dim):
234-
self.partial_rotary_factor = self.rotary_dim / self.head_dim
235-
logger.info(f"Partial rotation detected. Calculated partial_rotary_factor: {self.partial_rotary_factor}")
236247

237248
self._post_init()
238249

fastdeploy/demo/offline_demo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
from fastdeploy.engine.sampling_params import SamplingParams
1818
from fastdeploy.entrypoints.llm import LLM
1919

20-
model_name_or_path = "./models/llama-7b"
20+
model_name_or_path = "/home/aistudio/config_folder"
2121

2222
# 超参设置
2323
sampling_params = SamplingParams(temperature=0.1, max_tokens=30)
24-
llm = LLM(model=model_name_or_path, tensor_parallel_size=1)
24+
llm = LLM(model=model_name_or_path, tensor_parallel_size=4, load_choices="default_v1")
2525
output = llm.generate(prompts="who are you?", use_tqdm=True, sampling_params=sampling_params)
2626

2727
print(output)

fastdeploy/model_executor/models/minimax_m1.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,15 +477,46 @@ def forward(self, forward_meta: ForwardMeta, hidden_states: paddle.Tensor, resid
477477
# GQA
478478
if self.attn_type == 1:
479479
qkv_out = self.qkv_proj(layernorm_output)
480+
# print_tensor_stats(qkv_out, f"FD_L{layer_id}:1b_After_QKV_Proj_Combined")
481+
logger.info(f"--- [FD DEBUG] PRE-ATTENTION DUMP FOR LAYER {layer_id} ---")
482+
print_tensor_stats(hidden_states, f"FD_L{layer_id}:0_HiddenStates_Input")
483+
print_tensor_stats(layernorm_output, f"FD_L{layer_id}:1a_After_InputLayernorm")
480484
print_tensor_stats(qkv_out, f"FD_L{layer_id}:1b_After_QKV_Proj_Combined")
481485

486+
482487
q_size_tp = self.self_attn.num_heads * self.self_attn.head_dim
483488
k_size_tp = self.self_attn.kv_num_heads * self.self_attn.head_dim
484489

485490
q_before_rope, k_before_rope, v_tensor = qkv_out.split([q_size_tp, k_size_tp, k_size_tp], axis=-1)
486491
print_tensor_stats(q_before_rope, f"FD_L{layer_id}:1c_Q_BeforeRoPE")
487492
print_tensor_stats(k_before_rope, f"FD_L{layer_id}:1d_K_BeforeRoPE")
488493
print_tensor_stats(v_tensor, f"FD_L{layer_id}:1e_V_Tensor")
494+
logger.info(f"--- [FD DEBUG] ForwardMeta DUMP FOR LAYER {layer_id} ---")
495+
# 1. RoPE 缓存 (最关键的)
496+
# 我们需要知道它的形状,以确认是否正确生成
497+
if forward_meta.rotary_embs is not None:
498+
logger.info("--- [FD DEBUG] forward_meta.rotary_embs ---")
499+
print_tensor_stats(forward_meta.rotary_embs, f"FD_L{layer_id}:meta_rotary_embs")
500+
# 预期形状: [2, bsz, max_seq_len, 1, rotary_dim] or [2, bsz, max_seq_len, 1, rotary_dim/2]
501+
# 对于 MiniMax-M1 (NEOX风格), 应该是 [2, 1, max_len, 1, 64]
502+
else:
503+
logger.info("--- [FD DEBUG] forward_meta.rotary_embs is None ---")
504+
505+
# 2. 序列长度信息
506+
print_tensor_stats(forward_meta.seq_lens_encoder, f"FD_L{layer_id}:meta_seq_lens_encoder")
507+
print_tensor_stats(forward_meta.seq_lens_decoder, f"FD_L{layer_id}:meta_seq_lens_decoder")
508+
print_tensor_stats(forward_meta.seq_lens_this_time, f"FD_L{layer_id}:meta_seq_lens_this_time")
509+
510+
# 3. Padding 和索引信息
511+
print_tensor_stats(forward_meta.ids_remove_padding, f"FD_L{layer_id}:meta_ids_remove_padding")
512+
print_tensor_stats(forward_meta.batch_id_per_token, f"FD_L{layer_id}:meta_batch_id_per_token")
513+
print_tensor_stats(forward_meta.cu_seqlens_q, f"FD_L{layer_id}:meta_cu_seqlens_q")
514+
print_tensor_stats(forward_meta.cu_seqlens_k, f"FD_L{layer_id}:meta_cu_seqlens_k")
515+
516+
# 4. KV Cache 相关信息
517+
print_tensor_stats(forward_meta.block_tables, f"FD_L{layer_id}:meta_block_tables")
518+
logger.info(f"--- [FD DEBUG] END OF DUMP FOR LAYER {layer_id} ---\n")
519+
# --- 日志打印结束 ---
489520

490521

491522
attn_output = self.self_attn(qkv=qkv_out, forward_meta=forward_meta)

fastdeploy/worker/gpu_model_runner.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
from fastdeploy import envs
8484
from fastdeploy.engine.pooling_params import PoolingParams
8585
from fastdeploy.engine.tasks import PoolingTask
86-
from fastdeploy.input.ernie4_5_vl_processor import DataProcessor
86+
# from fastdeploy.input.ernie4_5_vl_processor import DataProcessor
8787
from fastdeploy.inter_communicator import IPCSignal, ZmqIpcClient
8888
from fastdeploy.model_executor.forward_meta import ForwardMeta
8989
from fastdeploy.model_executor.layers.pool.metadata import PoolingMetadata
@@ -117,8 +117,8 @@ def __init__(
117117

118118
# VL model config:
119119
if self.enable_mm:
120-
if "ernie" in self.fd_config.model_config.model_type:
121-
self._init_image_preprocess()
120+
# if "ernie" in self.fd_config.model_config.model_type:
121+
# self._init_image_preprocess()
122122

123123
self.amp_black = [
124124
"reduce_sum",
@@ -1119,6 +1119,11 @@ def _init_share_inputs(self, max_num_seqs: int):
11191119

11201120
# Initialize rotary position embedding
11211121
if not self.enable_mm:
1122+
1123+
logger.info(f"Final rotary_dim from config: {self.model_config.rotary_dim}")
1124+
logger.info(f"Original head_dim from config: {self.model_config.head_dim}")
1125+
logger.info(f"Calculated partial_rotary_factor from config: {self.model_config.partial_rotary_factor}")
1126+
11221127
self.share_inputs["rope_emb"] = get_rope(
11231128
rotary_dim=self.model_config.head_dim,
11241129
position_ids=paddle.arange(self.model_config.max_model_len).reshape((1, -1)),
@@ -1128,7 +1133,6 @@ def _init_share_inputs(self, max_num_seqs: int):
11281133
)
11291134

11301135

1131-
11321136
# Set block tables
11331137
pre_max_block_num = (
11341138
self.model_config.max_model_len + self.cache_config.block_size - 1
@@ -2423,27 +2427,27 @@ def padding_cudagraph_inputs(self) -> None:
24232427
self.real_token_num = self.forward_meta.ids_remove_padding.shape[0]
24242428
return
24252429

2426-
def _init_image_preprocess(self) -> None:
2427-
processor = DataProcessor(
2428-
tokenizer_name=self.model_config.model,
2429-
image_preprocessor_name=str(self.model_config.model),
2430-
)
2431-
processor.eval()
2432-
image_preprocess = processor.image_preprocessor
2433-
image_preprocess.image_mean_tensor = paddle.to_tensor(image_preprocess.image_mean, dtype="float32").reshape(
2434-
[1, 3, 1, 1]
2435-
)
2436-
image_preprocess.image_std_tensor = paddle.to_tensor(image_preprocess.image_std, dtype="float32").reshape(
2437-
[1, 3, 1, 1]
2438-
)
2439-
image_preprocess.rescale_factor = paddle.to_tensor(image_preprocess.rescale_factor, dtype="float32")
2440-
image_preprocess.image_mean_tensor = image_preprocess.image_mean_tensor.squeeze([-2, -1]).repeat_interleave(
2441-
self.model_config.vision_config.patch_size**2 * 1, -1
2442-
)
2443-
image_preprocess.image_std_tensor = image_preprocess.image_std_tensor.squeeze([-2, -1]).repeat_interleave(
2444-
self.model_config.vision_config.patch_size**2 * 1, -1
2445-
)
2446-
self.image_preprocess = image_preprocess
2430+
# def _init_image_preprocess(self) -> None:
2431+
# processor = DataProcessor(
2432+
# tokenizer_name=self.model_config.model,
2433+
# image_preprocessor_name=str(self.model_config.model),
2434+
# )
2435+
# processor.eval()
2436+
# image_preprocess = processor.image_preprocessor
2437+
# image_preprocess.image_mean_tensor = paddle.to_tensor(image_preprocess.image_mean, dtype="float32").reshape(
2438+
# [1, 3, 1, 1]
2439+
# )
2440+
# image_preprocess.image_std_tensor = paddle.to_tensor(image_preprocess.image_std, dtype="float32").reshape(
2441+
# [1, 3, 1, 1]
2442+
# )
2443+
# image_preprocess.rescale_factor = paddle.to_tensor(image_preprocess.rescale_factor, dtype="float32")
2444+
# image_preprocess.image_mean_tensor = image_preprocess.image_mean_tensor.squeeze([-2, -1]).repeat_interleave(
2445+
# self.model_config.vision_config.patch_size**2 * 1, -1
2446+
# )
2447+
# image_preprocess.image_std_tensor = image_preprocess.image_std_tensor.squeeze([-2, -1]).repeat_interleave(
2448+
# self.model_config.vision_config.patch_size**2 * 1, -1
2449+
# )
2450+
# self.image_preprocess = image_preprocess
24472451

24482452
def _preprocess_mm_task(self, one: dict) -> None:
24492453
"""process batch"""

0 commit comments

Comments
 (0)