vllm-project · simon-mo · Jun 30, 2025 · Jun 30, 2025 · gemini-code-assist · Jun 30, 2025
@@ -310,10 +310,11 @@ def _listen_for_requests(self):
                 elif data["cmd"] == "PUT":
                     tensor_id = data["tensor_id"]
                     try:
-                        tensor = torch.empty(data["shape"],
-                                             dtype=getattr(
-                                                 torch, data["dtype"]),
-                                             device=self.device)
+                        with torch.cuda.stream(self.recv_stream):
+                            tensor = torch.empty(data["shape"],
+                                                 dtype=getattr(
+                                                     torch, data["dtype"]),
+                                                 device=self.device)
                         self.router_socket.send_multipart(
                             [remote_address, b"0"])
                         comm, rank = self.comms[remote_address.decode()]