vllm-project · vllm-bot · Mar 12, 2026 · Mar 12, 2026 · hickeyma · Mar 12, 2026
@@ -4,6 +4,7 @@
 
 import contextlib
 import json
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any, overload
 
@@ -299,7 +300,9 @@ def encode(
             tokens = self._maybe_truncate(tokens, max_length)
         return tokens
 
-    def decode(self, ids: list[int] | int, skip_special_tokens: bool = False) -> str:
+    def decode(
+        self, ids: Sequence[int] | int, skip_special_tokens: bool = False
+    ) -> str:
         """Decode token IDs to text, optionally skipping special tokens."""
         if isinstance(ids, int):
             ids = [ids]
@@ -321,7 +324,7 @@ def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list[int]:
         return [self._token_to_id.get(token, self._unk_token_id) for token in tokens]
 
     def convert_ids_to_tokens(
-        self, ids: list[int], skip_special_tokens: bool = False
+        self, ids: Sequence[int], skip_special_tokens: bool = False
     ) -> list[str]:
         tokens = []
         for token_id in ids:

@@ -68,7 +68,7 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
                 # tool_choice: "Forced Function" or "required" will override
                 # structured output json settings to make tool calling work correctly
                 request.structured_outputs = StructuredOutputsParams(
-                    json=json_schema_from_tool
+                    json=json_schema_from_tool  # type: ignore[call-arg]
                 )
                 request.response_format = None
             if isinstance(request, ResponsesRequest):