From d36dc7427630c3e96e41f33ad8d3895e19b34ea2 Mon Sep 17 00:00:00 2001 From: Kdump Date: Fri, 27 Sep 2024 10:44:24 +0800 Subject: [PATCH] add doc --- docs/vllm_integration.md | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/docs/vllm_integration.md b/docs/vllm_integration.md index 7d3205bb8..b99515246 100644 --- a/docs/vllm_integration.md +++ b/docs/vllm_integration.md @@ -23,3 +23,69 @@ See the supported models [here](https://vllm.readthedocs.io/en/latest/models/sup ''' python3 -m fastchat.serve.vllm_worker --model-path TheBloke/vicuna-7B-v1.5-AWQ --quantization awq ''' + +## Add vllm_worker support for lora_modules + +### usage + +1. start + +```bash +export VLLM_WORKER_MULTIPROC_METHOD=spawn +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m fastchat.serve.vllm_worker \ + --model-path /data/models/Qwen/Qwen2-72B-Instruct \ + --tokenizer /data/models/Qwen/Qwen2-72B-Instruct \ + --enable-lora \ + --lora-modules m1=/data/modules/lora/adapter/m1 m2=/data/modules/lora/adapter/m2 m3=/data/modules/lora/adapter/m3 \ + --model-names qwen2-72b-instruct,m1,m2,m3\ + --controller http://localhost:21001 \ + --host 0.0.0.0 \ + --num-gpus 8 \ + --port 31034 \ + --limit-worker-concurrency 100 \ + --worker-address http://localhost:31034 +``` + +1. post + +- example1 + +```bash +curl --location --request POST 'http://fastchat_address:port/v1/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer sk-xxx' \ +--data-raw '{ + "model": "m1", + "stream": false, + "temperature": 0.7, + "top_p": 0.1, + "max_tokens": 4096, + "messages": [ + { + "role": "user", + "content": "Hi?" + } + ] + }' +``` + +- example2 + +```bash +curl --location --request POST 'http://fastchat_address:port/v1/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer sk-xxx' \ +--data-raw '{ + "model": "qwen2-72b-instruct", + "stream": false, + "temperature": 0.7, + "top_p": 0.1, + "max_tokens": 4096, + "messages": [ + { + "role": "user", + "content": "Hi?" + } + ] + }' +```