Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(vllm): add support for image-to-text and video-to-text #3729

Merged
merged 7 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 68 additions & 5 deletions backend/python/vllm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import signal
import sys
import os
from typing import List
from PIL import Image

import backend_pb2
import backend_pb2_grpc
Expand All @@ -15,6 +17,8 @@
from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.multimodal.utils import fetch_image
from vllm.assets.video import VideoAsset

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

Expand Down Expand Up @@ -105,6 +109,7 @@ async def LoadModel(self, request, context):
try:
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

try:
Expand All @@ -117,7 +122,7 @@ async def LoadModel(self, request, context):
)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

print("Model loaded successfully", file=sys.stderr)
return backend_pb2.Result(message="Model loaded successfully", success=True)

async def Predict(self, request, context):
Expand Down Expand Up @@ -196,15 +201,33 @@ async def _predict(self, request, context, streaming=False):
if request.Seed != 0:
sampling_params.seed = request.Seed

# Extract image paths and process images
prompt = request.Prompt

# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template

image_paths = request.Images
image_data = [self.load_image(img_path) for img_path in image_paths]

videos_path = request.Videos
video_data = [self.load_video(video_path) for video_path in videos_path]

# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)

# Generate text
# Generate text using the LLM engine
request_id = random_uuid()
outputs = self.llm.generate(prompt, sampling_params, request_id)
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
outputs = self.llm.generate(
{
"prompt": prompt,
"multi_modal_data": {
"image": image_data if image_data else None,
"video": video_data if video_data else None,
} if image_data or video_data else None,
},
sampling_params=sampling_params,
request_id=request_id,
)

# Stream the results
generated_text = ""
Expand All @@ -227,9 +250,49 @@ async def _predict(self, request, context, streaming=False):
if streaming:
return

# Remove the image files from /tmp folder
for img_path in image_paths:
try:
os.remove(img_path)
except Exception as e:
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)

# Sending the final generated text
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))

def load_image(self, image_path: str):
"""
Load an image from the given file path.

Args:
image_path (str): The path to the image file.

Returns:
Image: The loaded image.
"""
try:
return Image.open(image_path)
except Exception as e:
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
return self.load_video(image_path)

def load_video(self, video_path: str):
"""
Load a video from the given file path.

Args:
video_path (str): The path to the image file.

Returns:
Video: The loaded video.
"""
try:
video = VideoAsset(name=video_path).np_ndarrays
return video
except Exception as e:
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
return None

async def serve(address):
# Start asyncio gRPC server
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
Expand Down
16 changes: 15 additions & 1 deletion backend/python/vllm/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi

installRequirements
if [ "x${BUILD_TYPE}" == "x" ]; then
ensureVenv
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
if [ ! -d vllm ]; then
git clone https://github.com/vllm-project/vllm
fi
pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install
popd
rm -rf vllm
else
installRequirements
fi
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-cublas11.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-cublas12.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-hipblas.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
accelerate
torch
transformers
transformers
bitsandbytes
3 changes: 2 additions & 1 deletion backend/python/vllm/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ accelerate
torch
transformers
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
bitsandbytes
Loading