From c8f28ed5d3081a75b0491b03cb4f65858b8d3a4c Mon Sep 17 00:00:00 2001 From: Nikhil Ghosh Date: Fri, 7 Nov 2025 14:37:40 -0800 Subject: [PATCH] fix vllm ray data quickstart example - gpu memory constraint Signed-off-by: Nikhil Ghosh --- doc/source/data/doc_code/working-with-llms/basic_llm_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py index fd36c0e2aa68..532c60a7db24 100644 --- a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py +++ b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py @@ -25,7 +25,7 @@ engine_kwargs={ "enable_chunked_prefill": True, "max_num_batched_tokens": 4096, # Reduce if CUDA OOM occurs - "max_model_len": 16384, + "max_model_len": 4096, # Constrain to fit test GPU memory }, concurrency=1, batch_size=64,