Avarok-Cybersecurity · AzeezIsh · May 11, 2026 · May 10, 2026
diff --git a/recipes/qwen3.6/qwen3.6-35b-a3b-nvfp4-atlas.yaml b/recipes/qwen3.6/qwen3.6-35b-a3b-nvfp4-atlas.yaml
@@ -0,0 +1,29 @@
+recipe_version: "2"
+model: RedHatAI/Qwen3.6-35B-A3B-NVFP4
+runtime: atlas
+container: avarok/atlas-gb10:latest
+max_nodes: 1
+
+metadata:
+  description: |
+    Qwen3.6-35B-A3B (RedHatAI NVFP4) with MTP K=2 on the Atlas runtime.
+    Mirrors the qwen3.5-35b-a3b-nvfp4 recipe but uses the qwen3_5_moe
+    architecture from Qwen3.6, served on a single GB10. NVFP4 weights +
+    NVFP4 KV cache + NVFP4 MTP-quantized draft head.
+  maintainer: avarok
+  category: agent
+  model_params: 35B
+  model_dtype: nvfp4
+  quantization: nvfp4
+  kv_dtype: nvfp4
+
+defaults:
+  port: 8888
+  host: 0.0.0.0
+  max_model_len: 131072
+  kv_cache_dtype: nvfp4
+  gpu_memory_utilization: 0.88
+  scheduling_policy: slai
+  speculative: true
+  mtp_quantization: nvfp4
+  enable_prefix_caching: true