Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ class HunYuanMoEV1Config(PretrainedConfig):
The dropout ratio for the attention probabilities.
num_experts (`int` or `List`, *optional*, defaults to 1):
The number of experts for moe. If it is a list, it will be used as the number of experts for each layer.
norm_topk_prob (`bool`, *optional*, defaults to `True`):
Whether to normalize the topk probabilities.
moe_topk (int or List, *optional*, defaults to 1):
Number of experts selected per token (Top-K routing). List form enables layer-wise customization.
head_dim (`int`, *optional*, defaults to 128):
Expand Down Expand Up @@ -135,7 +133,6 @@ def __init__(
attention_bias=False,
attention_dropout=0.0,
num_experts: Union[int, list] = 1,
norm_topk_prob=True,
moe_topk: Union[int, list] = 1,
head_dim=None,
**kwargs,
Expand All @@ -147,7 +144,6 @@ def __init__(
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.num_experts = num_experts
self.norm_topk_prob = norm_topk_prob
self.moe_topk = moe_topk

self.head_dim = head_dim
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ def __init__(self, config: HunYuanMoEV1Config, layer_idx: Optional[int] = None):
self.layer_idx = layer_idx
self.num_experts = config.num_experts if isinstance(config.num_experts, int) else config.num_experts[layer_idx]
self.top_k = config.moe_topk if isinstance(config.moe_topk, int) else config.moe_topk[layer_idx]
self.norm_topk_prob = config.norm_topk_prob
self.gate = HunYuanMoEV1Gate(config, layer_idx=layer_idx)
# self.wg = nn.Linear(config.hidden_size, config.num_experts, bias=False, dtype=torch.float32)
self.experts = nn.ModuleList(
Expand All @@ -270,8 +269,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:

routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
if self.norm_topk_prob:
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
# we cast back to the input dtype
routing_weights = routing_weights.to(hidden_states.dtype)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def __init__(self, config: HunYuanMoEV1Config, layer_idx: Optional[int] = None):
self.layer_idx = layer_idx
self.num_experts = config.num_experts if isinstance(config.num_experts, int) else config.num_experts[layer_idx]
self.top_k = config.moe_topk if isinstance(config.moe_topk, int) else config.moe_topk[layer_idx]
self.norm_topk_prob = config.norm_topk_prob
self.gate = HunYuanMoEV1Gate(config, layer_idx=layer_idx)
# self.wg = nn.Linear(config.hidden_size, config.num_experts, bias=False, dtype=torch.float32)
self.experts = nn.ModuleList(
Expand All @@ -155,8 +154,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:

routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
if self.norm_topk_prob:
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
# we cast back to the input dtype
routing_weights = routing_weights.to(hidden_states.dtype)

Expand Down
11 changes: 5 additions & 6 deletions tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def tearDown(self):
def test_model_generation(self):
# we will compele this when model file change over
# pass
EXPECTED_ANSWER = "\nRegular exercise offers numerous physical, mental, and emotional benefits. It improves cardiovascular health, strengthens muscles and bones, boosts metabolism, and helps"
prompt = "Write a short summary of the benefits of regular exercise "
EXPECTED_ANSWER = "\nOkay, I need to write a short summary about the benefits of regular exercise. Let me start by recalling what I know. First,"
prompt = "Write a short summary of the benefits of regular exercise"
tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct")
model = AutoModelForCausalLM.from_pretrained("tencent/Hunyuan-A13B-Instruct", device_map="auto")
messages = [
Expand All @@ -125,9 +125,8 @@ def test_model_generation(self):
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
enable_thinking=False, # Toggle thinking mode (default: True)
)
generated_ids = model.generate(tokenized_chat.to(model.device), max_new_tokens=30, top_k=1)
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
answer = text.split("<answer>")[1]
self.assertEqual(EXPECTED_ANSWER, answer)
text = tokenizer.decode(generated_ids[0])
output = text.split("<think>")[1]
self.assertEqual(EXPECTED_ANSWER, output)