cohere-ai · walterbm-cohere · Apr 14, 2026 · Apr 14, 2026
diff --git a/.github/workflows/python-bindings-ci.yml b/.github/workflows/python-bindings-ci.yml
@@ -40,19 +40,22 @@ jobs:
 
   py-check:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        vllm-version: ["0.14.1", "0.15.1"]
     steps:
       - uses: actions/checkout@v4
       - name: Build sdist
         uses: PyO3/maturin-action@v1
         with:
           command: sdist
           args: --out dist
-      - name: ty
+      - name: ty (vllm==${{ matrix.vllm-version }})
         shell: bash
         run: |
           set -e
           python3 -m venv .venv
           source .venv/bin/activate
-          pip install ty vllm==0.14.1
+          pip install ty vllm==${{ matrix.vllm-version }}
           pip install cohere_melody --find-links dist --force-reinstall
           ty check cohere_melody_vllm/
diff --git a/cohere_melody_vllm/parser.py b/cohere_melody_vllm/parser.py
@@ -3,21 +3,45 @@
 Wraps the melody functionality into vLLM parsers for reasoning and tool calls.
 """
 
-from typing import Optional, Sequence, Union, TYPE_CHECKING
-from vllm.entrypoints.openai.protocol import (
-    ChatCompletionRequest,
-    ResponsesRequest,
-    DeltaMessage,
-    DeltaToolCall,
-    DeltaFunctionCall,
-    ExtractedToolCallInformation,
-    FunctionCall,
-    ToolCall,
-)
+from importlib.metadata import version as _get_version
+from typing import TYPE_CHECKING, Optional, Sequence, Union
+
+from packaging.version import Version as _Version
+
 from vllm.reasoning import ReasoningParser, ReasoningParserManager
 from vllm.tool_parsers import ToolParser, ToolParserManager
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 
+# vllm > 0.14.1 reorganized OpenAI entrypoint imports (https://github.com/vllm-project/vllm/pull/32240)
+_VLLM_POST_0_14_1 = _Version(_get_version("vllm")) > _Version("0.14.1")
+if _VLLM_POST_0_14_1:
+    from vllm.entrypoints.openai.chat_completion.protocol import (  # ty: ignore[unresolved-import]
+        ChatCompletionRequest,
+    )
+    from vllm.entrypoints.openai.engine.protocol import (  # ty: ignore[unresolved-import]
+        DeltaFunctionCall,
+        DeltaMessage,
+        DeltaToolCall,
+        ExtractedToolCallInformation,
+        FunctionCall,
+        ToolCall,
+    )
+    from vllm.entrypoints.openai.responses.protocol import (  # ty: ignore[unresolved-import]
+        ResponsesRequest,
+    )
+else:
+    from vllm.entrypoints.openai.protocol import (  # ty: ignore[unresolved-import]
+        ChatCompletionRequest,
+        DeltaFunctionCall,
+        DeltaMessage,
+        DeltaToolCall,
+        ExtractedToolCallInformation,
+        FunctionCall,
+        ResponsesRequest,
+        ToolCall,
+    )
+
+
 try:
     from cohere_melody import PyFilter, PyFilterOptions  # type: ignore
 
@@ -122,7 +146,7 @@ def extract_content_ids(self, input_ids: list[int]) -> list[int]:
                 content_ids.extend(token_ids)
         return content_ids
 
-    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+    def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
         end_token_id = self.model_tokenizer.convert_tokens_to_ids("<|END_THINKING|>")
         return any(input_id == end_token_id for input_id in reversed(input_ids))