Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/models/afmoe/configuration_afmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class AfmoeConfig(PreTrainedConfig):
output_router_logits: bool = False
global_attn_every_n_layers: int | None = 4
sliding_window: int | None = 1024
layer_types: list | None = None
layer_types: list[str] | None = None
attention_dropout: float | int | None = 0.0
mup_enabled: bool | None = False
eos_token_id: int | list[int] | None = None
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/llama4/configuration_llama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class Llama4TextConfig(PreTrainedConfig):
no_rope_layers: list[int] | None = None
no_rope_layer_interval: int = 4
attention_chunk_size: int = 8192
layer_types: list[int] | None = None
layer_types: list[str] | None = None
attn_temperature_tuning: bool = True
floor_scale: int = 8192
attn_scale: float = 0.1
Expand Down
Loading