diff --git a/src/transformers/models/llama4/configuration_llama4.py b/src/transformers/models/llama4/configuration_llama4.py index a710903da1d0..79cfd063f4d4 100644 --- a/src/transformers/models/llama4/configuration_llama4.py +++ b/src/transformers/models/llama4/configuration_llama4.py @@ -165,7 +165,7 @@ class Llama4TextConfig(PreTrainedConfig): rope_parameters: RopeParameters | dict | None = None no_rope_layers: list[int] | None = None no_rope_layer_interval: int = 4 - attention_chunk_size: int = 8192 + attention_chunk_size: int | None = 8192 layer_types: list[str] | None = None attn_temperature_tuning: bool = True floor_scale: int = 8192