Skip to content

Commit

Permalink
feat(vllm): add support for image-to-text and video-to-text (mudler#3729
Browse files Browse the repository at this point in the history
)

* feat(vllm): add support for image-to-text

Related to mudler#3670

Signed-off-by: Ettore Di Giacinto <[email protected]>

* feat(vllm): add support for video-to-text

Closes: mudler#2318

Signed-off-by: Ettore Di Giacinto <[email protected]>

* feat(vllm): support CPU installations

Signed-off-by: Ettore Di Giacinto <[email protected]>

* feat(vllm): add bnb

Signed-off-by: Ettore Di Giacinto <[email protected]>

* chore: add docs reference

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Apply suggestions from code review

Signed-off-by: Ettore Di Giacinto <[email protected]>

---------

Signed-off-by: Ettore Di Giacinto <[email protected]>
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored and siddimore committed Oct 6, 2024
1 parent bb130ff commit 5b19cee
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 10 deletions.
73 changes: 68 additions & 5 deletions backend/python/vllm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import signal
import sys
import os
from typing import List
from PIL import Image

import backend_pb2
import backend_pb2_grpc
Expand All @@ -15,6 +17,8 @@
from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.multimodal.utils import fetch_image
from vllm.assets.video import VideoAsset

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

Expand Down Expand Up @@ -105,6 +109,7 @@ async def LoadModel(self, request, context):
try:
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

try:
Expand All @@ -117,7 +122,7 @@ async def LoadModel(self, request, context):
)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

print("Model loaded successfully", file=sys.stderr)
return backend_pb2.Result(message="Model loaded successfully", success=True)

async def Predict(self, request, context):
Expand Down Expand Up @@ -196,15 +201,33 @@ async def _predict(self, request, context, streaming=False):
if request.Seed != 0:
sampling_params.seed = request.Seed

# Extract image paths and process images
prompt = request.Prompt

# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template

image_paths = request.Images
image_data = [self.load_image(img_path) for img_path in image_paths]

videos_path = request.Videos
video_data = [self.load_video(video_path) for video_path in videos_path]

# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)

# Generate text
# Generate text using the LLM engine
request_id = random_uuid()
outputs = self.llm.generate(prompt, sampling_params, request_id)
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
outputs = self.llm.generate(
{
"prompt": prompt,
"multi_modal_data": {
"image": image_data if image_data else None,
"video": video_data if video_data else None,
} if image_data or video_data else None,
},
sampling_params=sampling_params,
request_id=request_id,
)

# Stream the results
generated_text = ""
Expand All @@ -227,9 +250,49 @@ async def _predict(self, request, context, streaming=False):
if streaming:
return

# Remove the image files from /tmp folder
for img_path in image_paths:
try:
os.remove(img_path)
except Exception as e:
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)

# Sending the final generated text
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))

def load_image(self, image_path: str):
"""
Load an image from the given file path.
Args:
image_path (str): The path to the image file.
Returns:
Image: The loaded image.
"""
try:
return Image.open(image_path)
except Exception as e:
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
return self.load_video(image_path)

def load_video(self, video_path: str):
"""
Load a video from the given file path.
Args:
video_path (str): The path to the image file.
Returns:
Video: The loaded video.
"""
try:
video = VideoAsset(name=video_path).np_ndarrays
return video
except Exception as e:
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
return None

async def serve(address):
# Start asyncio gRPC server
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
Expand Down
16 changes: 15 additions & 1 deletion backend/python/vllm/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi

installRequirements
if [ "x${BUILD_TYPE}" == "x" ]; then
ensureVenv
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
if [ ! -d vllm ]; then
git clone https://github.com/vllm-project/vllm
fi
pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install
popd
rm -rf vllm
else
installRequirements
fi
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-cublas11.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-cublas12.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-hipblas.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ accelerate
torch
transformers
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
bitsandbytes

0 comments on commit 5b19cee

Please sign in to comment.