From c8f28ed5d3081a75b0491b03cb4f65858b8d3a4c Mon Sep 17 00:00:00 2001
From: Nikhil Ghosh <nikhil@anyscale.com>
Date: Fri, 7 Nov 2025 14:37:40 -0800
Subject: [PATCH] fix vllm ray data quickstart example - gpu memory constraint

Signed-off-by: Nikhil Ghosh <nikhil@anyscale.com>
---
 doc/source/data/doc_code/working-with-llms/basic_llm_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py
index fd36c0e2aa68..532c60a7db24 100644
--- a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py
+++ b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py
@@ -25,7 +25,7 @@
     engine_kwargs={
         "enable_chunked_prefill": True,
         "max_num_batched_tokens": 4096,  # Reduce if CUDA OOM occurs
-        "max_model_len": 16384,
+        "max_model_len": 4096,  # Constrain to fit test GPU memory
     },
     concurrency=1,
     batch_size=64,