Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions tests/entrypoints/pooling/classify/test_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch

from tests.models.utils import softmax
from vllm import LLM, ClassificationRequestOutput, PoolingParams, PoolingRequestOutput
from vllm import LLM, ClassificationRequestOutput, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory
from vllm.tasks import PoolingTask

Expand Down Expand Up @@ -66,15 +66,6 @@ def test_list_prompts(llm: LLM):
assert len(outputs[i].outputs.probs) == num_labels


@pytest.mark.skip_global_cleanup
def test_token_classify(llm: LLM):
outputs = llm.encode(prompt, pooling_task="token_classify", use_tqdm=False)
assert len(outputs) == 1
assert isinstance(outputs[0], PoolingRequestOutput)
assert outputs[0].prompt_token_ids == prompt_token_ids
assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):
def get_outputs(use_activation):
Expand Down Expand Up @@ -107,8 +98,12 @@ def test_score_api(llm: LLM):
llm.score("ping", "pong", use_tqdm=False)


@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
@pytest.mark.parametrize("task", ["token_classify", "embed", "token_embed"])
def test_unsupported_tasks(llm: LLM, task: PoolingTask):
err_msg = f"Unsupported task: '{task}' Supported tasks.+"
if task == "token_classify":
err_msg = "Try switching the model's pooling_task via.+"
else:
err_msg = "Embedding API is not supported by this model.+"

with pytest.raises(ValueError, match=err_msg):
llm.encode(prompt, pooling_task=task, use_tqdm=False)
29 changes: 8 additions & 21 deletions tests/entrypoints/pooling/classify/test_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,26 +436,7 @@ async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
task = "token_classify"
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 8
assert len(poolings.data[0].data[0]) == 2


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
@pytest.mark.parametrize("task", ["token_classify", "embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
Expand All @@ -469,4 +450,10 @@ async def test_pooling_not_supported(
},
)
assert response.json()["error"]["type"] == "BadRequestError"
assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")

if task == "token_classify":
err_msg = "Try switching the model's pooling_task via"
else:
err_msg = f"Unsupported task: {task!r}"

assert response.json()["error"]["message"].startswith(err_msg)
52 changes: 44 additions & 8 deletions tests/entrypoints/pooling/embed/test_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
import torch
import torch.nn.functional as F

from vllm import LLM, PoolingParams
from vllm import LLM, EmbeddingRequestOutput, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory
from vllm.platforms import current_platform
from vllm.tasks import PoolingTask

MODEL_NAME = "intfloat/multilingual-e5-small"

prompts = ["The chef prepared a delicious meal."]
prompt = "The chef prepared a delicious meal."
prompt_token_ids = [0, 581, 21861, 133888, 10, 8, 150, 60744, 109911, 5, 2]
embedding_size = 384


@pytest.fixture(scope="module")
Expand All @@ -35,25 +38,47 @@ def llm():
seed=0,
attention_config=attention_config,
)
assert embedding_size == llm.model_config.embedding_size

yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_token_embed(llm: LLM):
outputs = llm.encode(prompts, pooling_task="token_embed", use_tqdm=False)
multi_vector = outputs[0].outputs.data
assert multi_vector.shape == (11, 384)
def test_str_prompts(llm: LLM):
outputs = llm.embed(prompt, use_tqdm=False)
assert len(outputs) == 1
assert isinstance(outputs[0], EmbeddingRequestOutput)
assert outputs[0].prompt_token_ids == prompt_token_ids
assert len(outputs[0].outputs.embedding) == embedding_size


@pytest.mark.skip_global_cleanup
def test_token_ids_prompts(llm: LLM):
outputs = llm.embed([prompt_token_ids], use_tqdm=False)
assert len(outputs) == 1
assert isinstance(outputs[0], EmbeddingRequestOutput)
assert outputs[0].prompt_token_ids == prompt_token_ids
assert len(outputs[0].outputs.embedding) == embedding_size


@pytest.mark.skip_global_cleanup
def test_list_prompts(llm: LLM):
outputs = llm.embed([prompt, prompt_token_ids], use_tqdm=False)
assert len(outputs) == 2
for i in range(len(outputs)):
assert isinstance(outputs[i], EmbeddingRequestOutput)
assert outputs[i].prompt_token_ids == prompt_token_ids
assert len(outputs[i].outputs.embedding) == embedding_size


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):
def get_outputs(normalize):
outputs = llm.embed(
prompts,
[prompt],
pooling_params=PoolingParams(use_activation=normalize),
use_tqdm=False,
)
Expand All @@ -70,3 +95,14 @@ def get_outputs(normalize):
assert torch.allclose(w_normal, F.normalize(wo_normal, p=2, dim=-1), atol=1e-2), (
"w_normal should be close to normal(wo_normal)."
)


@pytest.mark.parametrize("task", ["token_embed", "classify", "token_classify"])
def test_unsupported_tasks(llm: LLM, task: PoolingTask):
if task == "token_embed":
err_msg = "Try switching the model's pooling_task via.+"
else:
err_msg = "Classification API is not supported by this model.+"

with pytest.raises(ValueError, match=err_msg):
llm.encode(prompt, pooling_task=task, use_tqdm=False)
33 changes: 10 additions & 23 deletions tests/entrypoints/pooling/embed/test_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,28 +732,9 @@ async def test_pooling_embed(server: RemoteOpenAIServer, model_name: str):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_embed(server: RemoteOpenAIServer, model_name: str):
task = "token_embed"
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)

poolings = PoolingResponse.model_validate(response.json())

assert len(poolings.data) == 1
assert len(poolings.data[0].data) == len(input_tokens)
assert len(poolings.data[0].data[0]) == 384


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["classify", "token_classify", "plugin"])
@pytest.mark.parametrize(
"task", ["token_embed", "classify", "token_classify", "plugin"]
)
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
Expand All @@ -767,4 +748,10 @@ async def test_pooling_not_supported(
},
)
assert response.json()["error"]["type"] == "BadRequestError"
assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")

if task == "token_embed":
err_msg = "Try switching the model's pooling_task via"
else:
err_msg = f"Unsupported task: {task!r}"

assert response.json()["error"]["message"].startswith(err_msg)
24 changes: 7 additions & 17 deletions tests/entrypoints/pooling/score/test_online_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,22 +203,7 @@ async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
response = requests.post(
server.url_for("pooling"),
json={"model": model_name, "input": input_text, "encoding_format": "float"},
)

poolings = PoolingResponse.model_validate(response.json())

assert len(poolings.data) == 1
assert len(poolings.data[0].data) == len(input_tokens)
assert len(poolings.data[0].data[0]) == 1


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
@pytest.mark.parametrize("task", ["token_classify", "embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
Expand All @@ -232,4 +217,9 @@ async def test_pooling_not_supported(
},
)
assert response.json()["error"]["type"] == "BadRequestError"
assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")
if task == "token_classify":
err_msg = "Try switching the model's pooling_task via"
else:
err_msg = f"Unsupported task: {task!r}"

assert response.json()["error"]["message"].startswith(err_msg)
Empty file.
76 changes: 76 additions & 0 deletions tests/entrypoints/pooling/token_classify/test_offline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import weakref

import pytest

from vllm import LLM, PoolingRequestOutput
from vllm.config import PoolerConfig
from vllm.distributed import cleanup_dist_env_and_memory
from vllm.tasks import PoolingTask

MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"

prompt = "The chef prepared a delicious meal."
prompt_token_ids = [785, 29706, 10030, 264, 17923, 15145, 13]
num_labels = 2


@pytest.fixture(scope="module")
def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(
model=MODEL_NAME,
pooler_config=PoolerConfig(pooling_task="token_classify"),
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,
enforce_eager=True,
seed=0,
)

yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_str_prompts(llm: LLM):
outputs = llm.encode(prompt, pooling_task="token_classify", use_tqdm=False)
assert len(outputs) == 1
assert isinstance(outputs[0], PoolingRequestOutput)
assert outputs[0].prompt_token_ids == prompt_token_ids
assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)


@pytest.mark.skip_global_cleanup
def test_token_ids_prompts(llm: LLM):
outputs = llm.encode(
[prompt_token_ids], pooling_task="token_classify", use_tqdm=False
)
assert len(outputs) == 1
assert isinstance(outputs[0], PoolingRequestOutput)
assert outputs[0].prompt_token_ids == prompt_token_ids
assert outputs[0].outputs.data.shape == (len(prompt_token_ids), num_labels)


@pytest.mark.skip_global_cleanup
def test_score_api(llm: LLM):
err_msg = "Score API is only enabled for num_labels == 1."
with pytest.raises(ValueError, match=err_msg):
llm.score("ping", "pong", use_tqdm=False)


@pytest.mark.parametrize("task", ["classify", "embed", "token_embed"])
def test_unsupported_tasks(llm: LLM, task: PoolingTask):
if task == "classify":
err_msg = "Try switching the model's pooling_task via.+"
else:
err_msg = "Embedding API is not supported by this model.+"

with pytest.raises(ValueError, match=err_msg):
llm.encode(prompt, pooling_task=task, use_tqdm=False)
72 changes: 72 additions & 0 deletions tests/entrypoints/pooling/token_classify/test_online.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest
import requests

from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse

MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
DTYPE = "float32" # Use float32 to avoid NaN issue
input_text = "This product was excellent and exceeded my expectations"
input_tokens = [1986, 1985, 572, 9073, 323, 33808, 847, 16665]


@pytest.fixture(scope="module")
def server():
args = [
"--enforce-eager",
"--max-model-len",
"512",
"--dtype",
DTYPE,
"--pooler-config",
'{"pooling_task": "token_classify"}',
]

with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
task = "token_classify"
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 8
assert len(poolings.data[0].data[0]) == 2


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["classify", "embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)
assert response.json()["error"]["type"] == "BadRequestError"

if task == "classify":
err_msg = "Try switching the model's pooling_task via"
else:
err_msg = f"Unsupported task: {task!r}"
assert response.json()["error"]["message"].startswith(err_msg)
Empty file.
Loading
Loading