|
6 | 6 |
|
7 | 7 | from vllm.multimodal.utils import encode_image_base64, fetch_image
|
8 | 8 |
|
9 |
| -from ...utils import VLLM_PATH, RemoteOpenAIServer |
| 9 | +from ...utils import RemoteOpenAIServer |
10 | 10 |
|
11 |
| -MODEL_NAME = "llava-hf/llava-1.5-7b-hf" |
12 |
| -LLAVA_CHAT_TEMPLATE = VLLM_PATH / "examples/template_llava.jinja" |
13 |
| -assert LLAVA_CHAT_TEMPLATE.exists() |
| 11 | +MODEL_NAME = "microsoft/Phi-3.5-vision-instruct" |
| 12 | +MAXIMUM_IMAGES = 2 |
14 | 13 |
|
15 | 14 | # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
|
16 | 15 | TEST_IMAGE_URLS = [
|
|
24 | 23 | @pytest.fixture(scope="module")
|
25 | 24 | def server():
|
26 | 25 | args = [
|
27 |
| - "--dtype", |
28 |
| - "bfloat16", |
29 |
| - "--max-model-len", |
30 |
| - "4096", |
31 |
| - "--enforce-eager", |
32 |
| - "--chat-template", |
33 |
| - str(LLAVA_CHAT_TEMPLATE), |
| 26 | + "--dtype", "bfloat16", "--max-model-len", "4096", "--max-num-seqs", |
| 27 | + "5", "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", |
| 28 | + f"image={MAXIMUM_IMAGES}" |
34 | 29 | ]
|
35 | 30 |
|
36 | 31 | with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
@@ -84,7 +79,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
|
84 | 79 | choice = chat_completion.choices[0]
|
85 | 80 | assert choice.finish_reason == "length"
|
86 | 81 | assert chat_completion.usage == openai.types.CompletionUsage(
|
87 |
| - completion_tokens=10, prompt_tokens=596, total_tokens=606) |
| 82 | + completion_tokens=10, prompt_tokens=772, total_tokens=782) |
88 | 83 |
|
89 | 84 | message = choice.message
|
90 | 85 | message = chat_completion.choices[0].message
|
@@ -139,7 +134,7 @@ async def test_single_chat_session_image_base64encoded(
|
139 | 134 | choice = chat_completion.choices[0]
|
140 | 135 | assert choice.finish_reason == "length"
|
141 | 136 | assert chat_completion.usage == openai.types.CompletionUsage(
|
142 |
| - completion_tokens=10, prompt_tokens=596, total_tokens=606) |
| 137 | + completion_tokens=10, prompt_tokens=772, total_tokens=782) |
143 | 138 |
|
144 | 139 | message = choice.message
|
145 | 140 | message = chat_completion.choices[0].message
|
@@ -217,47 +212,53 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
|
217 | 212 |
|
218 | 213 | @pytest.mark.asyncio
|
219 | 214 | @pytest.mark.parametrize("model_name", [MODEL_NAME])
|
220 |
| -@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) |
| 215 | +@pytest.mark.parametrize( |
| 216 | + "image_urls", |
| 217 | + [TEST_IMAGE_URLS[:i] for i in range(2, len(TEST_IMAGE_URLS))]) |
221 | 218 | async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
|
222 |
| - image_url: str): |
| 219 | + image_urls: List[str]): |
223 | 220 |
|
224 | 221 | messages = [{
|
225 | 222 | "role":
|
226 | 223 | "user",
|
227 | 224 | "content": [
|
228 |
| - { |
229 |
| - "type": "image_url", |
230 |
| - "image_url": { |
231 |
| - "url": image_url |
232 |
| - } |
233 |
| - }, |
234 |
| - { |
| 225 | + *({ |
235 | 226 | "type": "image_url",
|
236 | 227 | "image_url": {
|
237 | 228 | "url": image_url
|
238 | 229 | }
|
239 |
| - }, |
| 230 | + } for image_url in image_urls), |
240 | 231 | {
|
241 | 232 | "type": "text",
|
242 | 233 | "text": "What's in this image?"
|
243 | 234 | },
|
244 | 235 | ],
|
245 | 236 | }]
|
246 | 237 |
|
247 |
| - with pytest.raises(openai.BadRequestError): # test multi-image input |
248 |
| - await client.chat.completions.create( |
| 238 | + if len(image_urls) > MAXIMUM_IMAGES: |
| 239 | + with pytest.raises(openai.BadRequestError): # test multi-image input |
| 240 | + await client.chat.completions.create( |
| 241 | + model=model_name, |
| 242 | + messages=messages, |
| 243 | + max_tokens=10, |
| 244 | + temperature=0.0, |
| 245 | + ) |
| 246 | + |
| 247 | + # the server should still work afterwards |
| 248 | + completion = await client.completions.create( |
| 249 | + model=model_name, |
| 250 | + prompt=[0, 0, 0, 0, 0], |
| 251 | + max_tokens=5, |
| 252 | + temperature=0.0, |
| 253 | + ) |
| 254 | + completion = completion.choices[0].text |
| 255 | + assert completion is not None and len(completion) >= 0 |
| 256 | + else: |
| 257 | + chat_completion = await client.chat.completions.create( |
249 | 258 | model=model_name,
|
250 | 259 | messages=messages,
|
251 | 260 | max_tokens=10,
|
252 | 261 | temperature=0.0,
|
253 | 262 | )
|
254 |
| - |
255 |
| - # the server should still work afterwards |
256 |
| - completion = await client.completions.create( |
257 |
| - model=model_name, |
258 |
| - prompt=[0, 0, 0, 0, 0], |
259 |
| - max_tokens=5, |
260 |
| - temperature=0.0, |
261 |
| - ) |
262 |
| - completion = completion.choices[0].text |
263 |
| - assert completion is not None and len(completion) >= 0 |
| 263 | + message = chat_completion.choices[0].message |
| 264 | + assert message.content is not None and len(message.content) >= 0 |
0 commit comments