Fix typing errors

njhill · njhill · commit f58f49fd783f · 2024-09-03T15:08:07.000-07:00
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
@@ -67,7 +67,7 @@
 
 
 def model_is_embedding(model_name: str, trust_remote_code: bool,
-                       quantization: str) -> bool:
+                       quantization: Optional[str]) -> bool:
     return ModelConfig(model=model_name,
                        tokenizer=model_name,
                        tokenizer_mode="auto",
@@ -108,7 +108,7 @@ async def build_async_engine_client(
     async with build_async_engine_client_from_engine_args(
             engine_args, args.disable_frontend_multiprocessing) as engine:
 
-        async_engine_client = engine
+        async_engine_client = engine  # type: ignore[assignment]
         yield engine
 
 
@@ -189,7 +189,7 @@ async def build_async_engine_client_from_engine_args(
                         yield None
                         return
 
-            yield rpc_client
+            yield rpc_client  # type: ignore[misc]
         finally:
             # Ensure rpc server process was terminated
             rpc_server_process.terminate()
diff --git a/vllm/entrypoints/openai/rpc/client.py b/vllm/entrypoints/openai/rpc/client.py
@@ -7,6 +7,7 @@
 import cloudpickle
 import zmq
 import zmq.asyncio
+from zmq import Frame  # type: ignore[attr-defined]
 from zmq.asyncio import Socket
 
 from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
@@ -214,6 +215,7 @@ async def _send_get_data_rpc_request(self, request: RPCUtilityRequest,
 
             # Await the data from the Server.
             frame = await socket.recv(copy=False)
+            assert isinstance(frame, Frame)
             data = pickle.loads(frame.buffer)
 
         if isinstance(data, Exception):
@@ -247,6 +249,7 @@ async def do_rpc_call(socket: Socket, request: RPC_REQUEST_TYPE):
                                    f"{self._data_timeout} ms")
 
             frame = await socket.recv(copy=False)
+            assert isinstance(frame, Frame)
             return pickle.loads(frame.buffer)
 
         # Make a new socket connection.
@@ -395,6 +398,7 @@ async def generate(
                 # Stream back the results from the RPC Server.
                 while not finished:
                     message = await socket.recv(copy=False)
+                    assert isinstance(message, Frame)
                     request_output = pickle.loads(message.buffer)
 
                     if isinstance(request_output, Exception):