diff --git a/examples/pooling/score/vision_rerank_api_online.py b/examples/pooling/score/vision_rerank_api_online.py index d63ef2781634..875971f1aef3 100644 --- a/examples/pooling/score/vision_rerank_api_online.py +++ b/examples/pooling/score/vision_rerank_api_online.py @@ -18,48 +18,32 @@ """ import argparse -import base64 -import json +import pprint import requests - -def encode_base64_content_from_url(content_url: str) -> dict[str, str]: - """Encode a content retrieved from a remote url to base64 format.""" - - with requests.get(content_url, headers=headers) as response: - response.raise_for_status() - result = base64.b64encode(response.content).decode("utf-8") - - return {"url": f"data:image/jpeg;base64,{result}"} - - -headers = {"accept": "application/json", "Content-Type": "application/json"} +from vllm.multimodal.utils import encode_image_url, fetch_image query = "A woman playing with her dog on a beach at sunset." -documents = { - "content": [ - { - "type": "text", - "text": ( - "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, " - "as the dog offers its paw in a heartwarming display of companionship and trust." - ), - }, - { - "type": "image_url", - "image_url": { - "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" - }, - }, - { - "type": "image_url", - "image_url": encode_base64_content_from_url( - "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" - ), - }, - ] -} +document = ( + "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, " + "as the dog offers its paw in a heartwarming display of companionship and trust." +) +image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" +documents = [ + { + "type": "text", + "text": document, + }, + { + "type": "image_url", + "image_url": {"url": image_url}, + }, + { + "type": "image_url", + "image_url": {"url": encode_image_url(fetch_image(image_url))}, + }, +] def parse_args(): @@ -74,23 +58,36 @@ def main(args): models_url = base_url + "/v1/models" rerank_url = base_url + "/rerank" - response = requests.get(models_url, headers=headers) + response = requests.get(models_url) model = response.json()["data"][0]["id"] - data = { + print("Query: string & Document: list of string") + prompt = {"model": model, "query": query, "documents": [document]} + response = requests.post(rerank_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: text") + prompt = {"model": model, "query": query, "documents": {"content": [documents[0]]}} + response = requests.post(rerank_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: image url") + prompt = { + "model": model, + "query": query, + "documents": {"content": [documents[1]]}, + } + response = requests.post(rerank_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: image base64") + prompt = { "model": model, "query": query, - "documents": documents, + "documents": {"content": [documents[2]]}, } - response = requests.post(rerank_url, headers=headers, json=data) - - # Check the response - if response.status_code == 200: - print("Request successful!") - print(json.dumps(response.json(), indent=2)) - else: - print(f"Request failed with status code: {response.status_code}") - print(response.text) + response = requests.post(rerank_url, json=prompt) + pprint.pprint(response.json()) if __name__ == "__main__": diff --git a/examples/pooling/score/vision_score_api_online.py b/examples/pooling/score/vision_score_api_online.py index 01041b846325..df8218a8b2ad 100644 --- a/examples/pooling/score/vision_score_api_online.py +++ b/examples/pooling/score/vision_score_api_online.py @@ -17,48 +17,32 @@ """ import argparse -import base64 -import json import pprint import requests - -def encode_base64_content_from_url(content_url: str) -> dict[str, str]: - """Encode a content retrieved from a remote url to base64 format.""" - - with requests.get(content_url, headers=headers) as response: - response.raise_for_status() - result = base64.b64encode(response.content).decode("utf-8") - - return {"url": f"data:image/jpeg;base64,{result}"} - - -headers = {"accept": "application/json", "Content-Type": "application/json"} - -queries = "slm markdown" -documents = { - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png" - }, - }, - { - "type": "image_url", - "image_url": { - "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png" - }, - }, - { - "type": "image_url", - "image_url": encode_base64_content_from_url( - "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png" - ), - }, - ] -} +from vllm.multimodal.utils import encode_image_url, fetch_image + +query = "A woman playing with her dog on a beach at sunset." +document = ( + "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, " + "as the dog offers its paw in a heartwarming display of companionship and trust." +) +image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" +documents = [ + { + "type": "text", + "text": document, + }, + { + "type": "image_url", + "image_url": {"url": image_url}, + }, + { + "type": "image_url", + "image_url": {"url": encode_image_url(fetch_image(image_url))}, + }, +] def parse_args(): @@ -73,15 +57,40 @@ def main(args): models_url = base_url + "/v1/models" score_url = base_url + "/score" - response = requests.get(models_url, headers=headers) + response = requests.get(models_url) model = response.json()["data"][0]["id"] - prompt = {"model": model, "queries": queries, "documents": documents} - response = requests.post(score_url, headers=headers, json=prompt) - print("\nPrompt when queries is string and documents is a image list:") - pprint.pprint(prompt) - print("\nScore Response:") - print(json.dumps(response.json(), indent=2)) + print("Query: string & Document: string") + prompt = {"model": model, "queries": query, "documents": document} + response = requests.post(score_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: text") + prompt = { + "model": model, + "queries": query, + "documents": {"content": [documents[0]]}, + } + response = requests.post(score_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: image url") + prompt = { + "model": model, + "queries": query, + "documents": {"content": [documents[1]]}, + } + response = requests.post(score_url, json=prompt) + pprint.pprint(response.json()) + + print("Query: string & Document: image base64") + prompt = { + "model": model, + "queries": query, + "documents": {"content": [documents[2]]}, + } + response = requests.post(score_url, json=prompt) + pprint.pprint(response.json()) if __name__ == "__main__": diff --git a/tests/entrypoints/pooling/classify/test_online_vision.py b/tests/entrypoints/pooling/classify/test_online_vision.py index 215921374238..312bb6fe531c 100644 --- a/tests/entrypoints/pooling/classify/test_online_vision.py +++ b/tests/entrypoints/pooling/classify/test_online_vision.py @@ -5,9 +5,9 @@ import pytest import requests -from tests.entrypoints.test_utils import encode_base64_content_from_url from tests.utils import RemoteOpenAIServer from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse +from vllm.multimodal.utils import encode_image_url, fetch_image MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls" MAXIMUM_VIDEOS = 1 @@ -19,7 +19,7 @@ } input_text = "This product was excellent and exceeded my expectations" image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg" -image_base64 = encode_base64_content_from_url(image_url) +image_base64 = {"url": encode_image_url(fetch_image(image_url))} video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4" diff --git a/tests/entrypoints/pooling/score/test_online_score_vision.py b/tests/entrypoints/pooling/score/test_online_score_vision.py new file mode 100644 index 000000000000..0f19498c05ba --- /dev/null +++ b/tests/entrypoints/pooling/score/test_online_score_vision.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +import requests + +from tests.utils import VLLM_PATH, RemoteOpenAIServer +from vllm.entrypoints.pooling.score.protocol import ScoreResponse +from vllm.multimodal.utils import encode_image_url, fetch_image + +MODEL_NAME = "Qwen/Qwen3-VL-Reranker-2B" +HF_OVERRIDES = { + "architectures": ["Qwen3VLForSequenceClassification"], + "classifier_from_token": ["no", "yes"], + "is_original_qwen3_reranker": True, +} + +query = "A cat standing in the snow." +image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg" +documents = [ + { + "type": "text", + "text": query, + }, + { + "type": "image_url", + "image_url": {"url": image_url}, + }, + { + "type": "image_url", + "image_url": {"url": encode_image_url(fetch_image(image_url))}, + }, +] + + +@pytest.fixture(scope="module") +def server(): + args = [ + "--enforce-eager", + "--max-model-len", + "8192", + "--chat-template", + str(VLLM_PATH / "examples/pooling/score/template/qwen3_vl_reranker.jinja"), + ] + + with RemoteOpenAIServer( + MODEL_NAME, args, override_hf_configs=HF_OVERRIDES + ) as remote_server: + yield remote_server + + +def test_score_api_queries_str_documents_str(server: RemoteOpenAIServer): + queries = "What is the capital of France?" + documents = "The capital of France is Paris." + + score_response = requests.post( + server.url_for("score"), + json={ + "model": MODEL_NAME, + "queries": queries, + "documents": documents, + }, + ) + score_response.raise_for_status() + score = ScoreResponse.model_validate(score_response.json()) + + assert score.id is not None + assert score.data is not None + assert len(score.data) == 1 + + +def test_score_api_queries_str_documents_text_content(server: RemoteOpenAIServer): + score_response = requests.post( + server.url_for("score"), + json={ + "model": MODEL_NAME, + "queries": query, + "documents": {"content": [documents[0]]}, + }, + ) + score_response.raise_for_status() + score = ScoreResponse.model_validate(score_response.json()) + + assert score.id is not None + assert score.data is not None + assert len(score.data) == 1 + + +def test_score_api_queries_str_documents_image_url_content(server: RemoteOpenAIServer): + score_response = requests.post( + server.url_for("score"), + json={ + "model": MODEL_NAME, + "queries": query, + "documents": {"content": [documents[1]]}, + }, + ) + score_response.raise_for_status() + score = ScoreResponse.model_validate(score_response.json()) + + assert score.id is not None + assert score.data is not None + assert len(score.data) == 1 + + +def test_score_api_queries_str_documents_image_base64_content( + server: RemoteOpenAIServer, +): + score_response = requests.post( + server.url_for("score"), + json={ + "model": MODEL_NAME, + "queries": query, + "documents": {"content": [documents[2]]}, + }, + ) + score_response.raise_for_status() + score = ScoreResponse.model_validate(score_response.json()) + + assert score.id is not None + assert score.data is not None + assert len(score.data) == 1 diff --git a/tests/entrypoints/test_utils.py b/tests/entrypoints/test_utils.py index 1f1b54267511..dc1101840645 100644 --- a/tests/entrypoints/test_utils.py +++ b/tests/entrypoints/test_utils.py @@ -1,9 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import base64 - -import requests - from vllm.entrypoints.utils import sanitize_message @@ -12,11 +8,3 @@ def test_sanitize_message(): sanitize_message("<_io.BytesIO object at 0x7a95e299e750>") == "<_io.BytesIO object>" ) - - -def encode_base64_content_from_url(content_url: str) -> dict[str, str]: - with requests.get(content_url) as response: - response.raise_for_status() - result = base64.b64encode(response.content).decode("utf-8") - - return {"url": f"data:image/jpeg;base64,{result}"} diff --git a/vllm/model_executor/models/adapters.py b/vllm/model_executor/models/adapters.py index a67f1bbe994c..8c10c6ddc4ba 100644 --- a/vllm/model_executor/models/adapters.py +++ b/vllm/model_executor/models/adapters.py @@ -466,6 +466,7 @@ def load_weights_using_from_2_way_softmax( language_model = _get_language_model_for_seq_cls(model) is_vlm = language_model is not model + using_vlm_head = is_vlm and hasattr(language_model, "score") language_model.lm_head = ParallelLMHead( text_config.vocab_size, text_config.hidden_size, quant_config=quant_config @@ -506,14 +507,16 @@ def load_weights_using_from_2_way_softmax( torch.float32 ) - lm_head_weight.data[[false_id]].to(torch.float32) - score_layer = language_model.score if is_vlm else model.score + score_layer = language_model.score if using_vlm_head else model.score param = score_layer.weight weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, score_weight) del language_model.lm_head - score_weight_name = "language_model.score.weight" if is_vlm else "score.weight" + score_weight_name = ( + "language_model.score.weight" if using_vlm_head else "score.weight" + ) loaded_weights.add(score_weight_name) lm_head_name = "lm_head.weight" @@ -537,6 +540,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te language_model = _get_language_model_for_seq_cls(model) is_vlm = language_model is not model + using_vlm_head = is_vlm and hasattr(language_model, "score") language_model.lm_head = ParallelLMHead( text_config.vocab_size, text_config.hidden_size, quant_config=quant_config @@ -572,14 +576,16 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens] score_weight = language_model.lm_head.weight.data[token_ids] - score_layer = language_model.score if is_vlm else model.score + score_layer = language_model.score if using_vlm_head else model.score param = score_layer.weight weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, score_weight) del language_model.lm_head - score_weight_name = "language_model.score.weight" if is_vlm else "score.weight" + score_weight_name = ( + "language_model.score.weight" if using_vlm_head else "score.weight" + ) loaded_weights.add(score_weight_name) lm_head_name = "lm_head.weight"