diff --git a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py index fd36c0e2aa68..532c60a7db24 100644 --- a/doc/source/data/doc_code/working-with-llms/basic_llm_example.py +++ b/doc/source/data/doc_code/working-with-llms/basic_llm_example.py @@ -25,7 +25,7 @@ engine_kwargs={ "enable_chunked_prefill": True, "max_num_batched_tokens": 4096, # Reduce if CUDA OOM occurs - "max_model_len": 16384, + "max_model_len": 4096, # Constrain to fit test GPU memory }, concurrency=1, batch_size=64,