From 9484cc0962f2bd5922a124e82d96ce4bb1042eba Mon Sep 17 00:00:00 2001
From: Changyong Um <e7217@naver.com>
Date: Tue, 12 Nov 2024 05:41:56 +0900
Subject: [PATCH] community[docs]: modify parameter for the LoRA adapter on the
 vllm page (#27930)

**Description:**
This PR modifies the documentation regarding the configuration of the
VLLM with the LoRA adapter. The updates aim to provide clear
instructions for users on how to set up the LoRA adapter when using the
VLLM.

- before
```python
VLLM(..., enable_lora=True)
```
- after
```python
VLLM(...,
    vllm_kwargs={
        "enable_lora": True
    }
)
```
This change clarifies that users should use the vllm_kwargs to enable
the LoRA adapter.

Co-authored-by: Um Changyong <changyong.um@sfa.co.kr>
---
 docs/docs/integrations/llms/vllm.ipynb | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb
index 1e1baff963161..e68ca9525ad73 100644
--- a/docs/docs/integrations/llms/vllm.ipynb
+++ b/docs/docs/integrations/llms/vllm.ipynb
@@ -266,8 +266,18 @@
     "from langchain_community.llms import VLLM\n",
     "from vllm.lora.request import LoRARequest\n",
     "\n",
-    "llm = VLLM(model=\"meta-llama/Llama-2-7b-hf\", enable_lora=True)\n",
-    "\n",
+    "llm = VLLM(\n",
+    "    model=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    max_new_tokens=300,\n",
+    "    top_k=1,\n",
+    "    top_p=0.90,\n",
+    "    temperature=0.1,\n",
+    "    vllm_kwargs={\n",
+    "        \"gpu_memory_utilization\": 0.5,\n",
+    "        \"enable_lora\": True,\n",
+    "        \"max_model_len\": 350,\n",
+    "    },\n",
+    ")\n",
     "LoRA_ADAPTER_PATH = \"path/to/adapter\"\n",
     "lora_adapter = LoRARequest(\"lora_adapter\", 1, LoRA_ADAPTER_PATH)\n",
     "\n",