From d8601d796371108c19d894636d4b792a6db2bafa Mon Sep 17 00:00:00 2001
From: Christopher Albert <albert@tugraz.at>
Date: Thu, 26 Mar 2026 00:57:30 +0100
Subject: [PATCH] fix: bump mlx-lm minimum to 0.31.0 for hybrid model batching

ArraysCache gained native batching support (extract, merge, filter,
prepare) in mlx-lm 0.31.0. Older versions crash with
"ArraysCache.__init__() missing 1 required positional argument: 'size'"
when continuous batching encounters hybrid models like Qwen3.5 that
mix KVCache and ArraysCache layers.

Fixes computor-org/vllm-mlx#11
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index fa92e960..88e878c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
 
 dependencies = [
     "mlx>=0.29.0",
-    "mlx-lm>=0.30.5",  # GLM-4 models require 0.30.5+ (new attention layers)
+    "mlx-lm>=0.31.0",  # 0.31+ required for ArraysCache native batching (hybrid models)
     "mlx-vlm>=0.1.0",  # VLM support
     "transformers>=5.0.0",  # mlx-lm 0.30.5+ requires transformers 5.0 (rc3 bug fixed in stable)
     "tokenizers>=0.19.0",