Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 100 additions & 17 deletions docs/serving/openai_compatible_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,15 @@ The following [sampling parameters](../api/README.md#inference-parameters) are s
??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:completion-sampling-params"
--8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-sampling-params"
```
Comment on lines 199 to 201
Copy link
Copy Markdown
Collaborator Author

@noooop noooop Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @chaunceyjiang

Fixed some API documentation; there might still be some that need to be fixed.


The following extra parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:completion-extra-params"
--8<-- "vllm/entrypoints/openai/completion/protocol.py:completion-extra-params"
```

### Chat API
Expand All @@ -228,15 +228,15 @@ The following [sampling parameters](../api/README.md#inference-parameters) are s
??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:chat-completion-sampling-params"
--8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-sampling-params"
```

The following extra parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:chat-completion-extra-params"
--8<-- "vllm/entrypoints/openai/chat_completion/protocol.py:chat-completion-extra-params"
```

### Responses API
Expand All @@ -253,15 +253,15 @@ The following extra parameters in the request object are supported:
??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:responses-extra-params"
--8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-extra-params"
```

The following extra parameters in the response object are supported:

??? code

```python
--8<-- "vllm/entrypoints/openai/protocol.py:responses-response-extra-params"
--8<-- "vllm/entrypoints/openai/responses/protocol.py:responses-response-extra-params"
```

### Embeddings API
Expand Down Expand Up @@ -378,23 +378,53 @@ The following [pooling parameters][vllm.PoolingParams] are supported.

```python
--8<-- "vllm/pooling_params.py:common-pooling-params"
--8<-- "vllm/pooling_params.py:embedding-pooling-params"
--8<-- "vllm/pooling_params.py:embed-pooling-params"
```

The following extra parameters are supported by default:
The following Embeddings API parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/pooling/embed/protocol.py:embedding-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:completion-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:encoding-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:embed-params"
```

For chat-like input (i.e. if `messages` is passed), these extra parameters are supported instead:
The following extra parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:completion-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:encoding-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:embed-extra-params"
```

For chat-like input (i.e. if `messages` is passed), the following parameters are supported:

The following parameters are supported by default:

??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:chat-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:encoding-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:embed-params"
```

these extra parameters are supported instead:

??? code

```python
--8<-- "vllm/entrypoints/pooling/embed/protocol.py:chat-embedding-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:chat-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:encoding-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:embed-extra-params"
```

### Transcriptions API
Expand Down Expand Up @@ -659,14 +689,48 @@ The following [pooling parameters][vllm.PoolingParams] are supported.

```python
--8<-- "vllm/pooling_params.py:common-pooling-params"
--8<-- "vllm/pooling_params.py:classification-pooling-params"
--8<-- "vllm/pooling_params.py:classify-pooling-params"
```

The following Classification API parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:completion-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-params"
```

The following extra parameters are supported:

```python
--8<-- "vllm/entrypoints/pooling/classify/protocol.py:classification-extra-params"
```
??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:completion-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
```

For chat-like input (i.e. if `messages` is passed), the following parameters are supported:

??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:chat-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-params"
```

these extra parameters are supported instead:

??? code

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:chat-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
```

### Score API

Expand Down Expand Up @@ -882,12 +946,21 @@ The following [pooling parameters][vllm.PoolingParams] are supported.

```python
--8<-- "vllm/pooling_params.py:common-pooling-params"
--8<-- "vllm/pooling_params.py:classification-pooling-params"
--8<-- "vllm/pooling_params.py:classify-pooling-params"
```

The following Score API parameters are supported:

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/score/protocol.py:score-extra-params"
```

The following extra parameters are supported:

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
--8<-- "vllm/entrypoints/pooling/score/protocol.py:score-extra-params"
```

Expand Down Expand Up @@ -963,12 +1036,22 @@ The following [pooling parameters][vllm.PoolingParams] are supported.

```python
--8<-- "vllm/pooling_params.py:common-pooling-params"
--8<-- "vllm/pooling_params.py:classification-pooling-params"
--8<-- "vllm/pooling_params.py:classify-pooling-params"
```

The following Re-rank API parameters are supported:

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
--8<-- "vllm/entrypoints/pooling/score/protocol.py:score-extra-params"
```

The following extra parameters are supported:

```python
--8<-- "vllm/entrypoints/pooling/base/protocol.py:pooling-common-extra-params"
--8<-- "vllm/entrypoints/pooling/base/protocol.py:classify-extra-params"
--8<-- "vllm/entrypoints/pooling/score/protocol.py:rerank-extra-params"
```

Expand Down
4 changes: 2 additions & 2 deletions examples/pooling/score/convert_model_to_seq_cls.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,9 @@ def parse_args():
help="Conversion method to use",
)
parser.add_argument(
"--use-pad-token",
"--use-sep-token",
action="store_true",
help="Enable padding token in the sequence classification model",
help="Enable separating token in the sequence classification model",
)
parser.add_argument(
"--path",
Expand Down
91 changes: 73 additions & 18 deletions tests/entrypoints/pooling/classify/test_online_vision.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import json

import pytest
import requests

from tests.entrypoints.test_utils import encode_base64_content_from_url
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse

VLM_MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls"
MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls"
MAXIMUM_VIDEOS = 1
TEST_VIDEO_URL = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"

HF_OVERRIDES = {
"text_config": {
"architectures": ["Qwen2_5_VLForSequenceClassification"],
},
}
input_text = "This product was excellent and exceeded my expectations"
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
image_base64 = encode_base64_content_from_url(image_url)
video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"


@pytest.fixture(scope="module")
def server_vlm_classify():
def server():
args = [
"--runner",
"pooling",
Expand All @@ -33,26 +36,80 @@ def server_vlm_classify():
]

with RemoteOpenAIServer(
VLM_MODEL_NAME, args, override_hf_configs=HF_OVERRIDES
MODEL_NAME, args, override_hf_configs=HF_OVERRIDES
) as remote_server:
yield remote_server


@pytest.mark.parametrize("model_name", [VLM_MODEL_NAME])
def test_classify_accepts_chat_text_only(
server_vlm_classify: RemoteOpenAIServer, model_name: str
) -> None:
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_chat_text_request(server: RemoteOpenAIServer, model_name: str):
messages = [
{
"role": "assistant",
"content": "Please classify this text request.",
},
{
"role": "user",
"content": input_text,
},
]

response = requests.post(
server.url_for("classify"),
json={"model": model_name, "messages": messages},
)
response.raise_for_status()

output = ClassificationResponse.model_validate(response.json())

assert output.object == "list"
assert output.model == model_name
assert len(output.data) == 1
assert len(output.data[0].probs) == 2
assert output.usage.prompt_tokens == 35


@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_chat_image_url_request(server: RemoteOpenAIServer, model_name: str):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Please classify this image."},
{"type": "image_url", "image_url": {"url": image_url}},
],
}
]

response = requests.post(
server.url_for("classify"),
json={"model": model_name, "messages": messages},
)
response.raise_for_status()

output = ClassificationResponse.model_validate(response.json())

assert output.object == "list"
assert output.model == model_name
assert len(output.data) == 1
assert len(output.data[0].probs) == 2
assert output.usage.prompt_tokens == 47


@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_chat_image_base64_request(server: RemoteOpenAIServer, model_name: str):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Please classify this text request."},
{"type": "text", "text": "Please classify this image."},
{"type": "image_url", "image_url": image_base64},
],
}
]

response = requests.post(
server_vlm_classify.url_for("classify"),
server.url_for("classify"),
json={"model": model_name, "messages": messages},
)
response.raise_for_status()
Expand All @@ -63,25 +120,23 @@ def test_classify_accepts_chat_text_only(
assert output.model == model_name
assert len(output.data) == 1
assert len(output.data[0].probs) == 2
assert output.usage.prompt_tokens == 22
assert output.usage.prompt_tokens == 47


@pytest.mark.parametrize("model_name", [VLM_MODEL_NAME])
def test_classify_accepts_chat_video_url(
server_vlm_classify: RemoteOpenAIServer, model_name: str
) -> None:
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_chat_video_url_request(server: RemoteOpenAIServer, model_name: str):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Please classify this video."},
{"type": "video_url", "video_url": {"url": TEST_VIDEO_URL}},
{"type": "video_url", "video_url": {"url": video_url}},
],
}
]

response = requests.post(
server_vlm_classify.url_for("classify"),
server.url_for("classify"),
json={"model": model_name, "messages": messages},
)
response.raise_for_status()
Expand Down
Loading