Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/python-bindings-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,22 @@ jobs:

py-check:
runs-on: ubuntu-latest
strategy:
matrix:
vllm-version: ["0.14.1", "0.15.1"]
steps:
- uses: actions/checkout@v4
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
command: sdist
args: --out dist
- name: ty
- name: ty (vllm==${{ matrix.vllm-version }})
shell: bash
run: |
set -e
python3 -m venv .venv
source .venv/bin/activate
pip install ty vllm==0.14.1
pip install ty vllm==${{ matrix.vllm-version }}
pip install cohere_melody --find-links dist --force-reinstall
ty check cohere_melody_vllm/
48 changes: 36 additions & 12 deletions cohere_melody_vllm/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,45 @@
Wraps the melody functionality into vLLM parsers for reasoning and tool calls.
"""

from typing import Optional, Sequence, Union, TYPE_CHECKING
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
ResponsesRequest,
DeltaMessage,
DeltaToolCall,
DeltaFunctionCall,
ExtractedToolCallInformation,
FunctionCall,
ToolCall,
)
from importlib.metadata import version as _get_version
from typing import TYPE_CHECKING, Optional, Sequence, Union

from packaging.version import Version as _Version

from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.tool_parsers import ToolParser, ToolParserManager
from vllm.transformers_utils.tokenizer import AnyTokenizer

# vllm > 0.14.1 reorganized OpenAI entrypoint imports (https://github.com/vllm-project/vllm/pull/32240)
_VLLM_POST_0_14_1 = _Version(_get_version("vllm")) > _Version("0.14.1")
Comment thread
shun-cohere marked this conversation as resolved.
if _VLLM_POST_0_14_1:
from vllm.entrypoints.openai.chat_completion.protocol import ( # ty: ignore[unresolved-import]
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import ( # ty: ignore[unresolved-import]
DeltaFunctionCall,
DeltaMessage,
DeltaToolCall,
ExtractedToolCallInformation,
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.responses.protocol import ( # ty: ignore[unresolved-import]
ResponsesRequest,
)
else:
from vllm.entrypoints.openai.protocol import ( # ty: ignore[unresolved-import]
ChatCompletionRequest,
DeltaFunctionCall,
DeltaMessage,
DeltaToolCall,
ExtractedToolCallInformation,
FunctionCall,
ResponsesRequest,
ToolCall,
)


try:
from cohere_melody import PyFilter, PyFilterOptions # type: ignore

Expand Down Expand Up @@ -122,7 +146,7 @@ def extract_content_ids(self, input_ids: list[int]) -> list[int]:
content_ids.extend(token_ids)
return content_ids

def is_reasoning_end(self, input_ids: list[int]) -> bool:
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
Comment thread
shun-cohere marked this conversation as resolved.
end_token_id = self.model_tokenizer.convert_tokens_to_ids("<|END_THINKING|>")
return any(input_id == end_token_id for input_id in reversed(input_ids))

Expand Down
Loading