diff --git a/vllm/attention/backends/rocm_aiter_mla.py b/vllm/attention/backends/rocm_aiter_mla.py index b048220020f1..c974f2a15a0e 100644 --- a/vllm/attention/backends/rocm_aiter_mla.py +++ b/vllm/attention/backends/rocm_aiter_mla.py @@ -132,8 +132,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): def __init__(self, input_builder: "ModelInputForGPUBuilder"): super().__init__(input_builder) - assert self.runner.model_config.max_model_len == 32768,\ - "AITER MLA requires max model len to be set to 32768" assert self.block_size == 1, "AITER MLA requires only block size 1." def prepare(self): diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py index 31980e94a037..d1e823bbe396 100644 --- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py +++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py @@ -66,9 +66,6 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): def __init__(self, runner, kv_cache_spec: AttentionSpec, block_table: BlockTable): super().__init__(runner, kv_cache_spec, block_table) - max_model_len = self.runner.model_config.max_model_len - assert max_model_len == 32768,\ - "AITER MLA requires max_model_len=32768" assert self.kv_cache_spec.block_size == 1, "AITER MLA" \ "only supports block size 1."