From 3f333adbbd13ac6737e14e7b77063a18755e8fae Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 28 May 2025 23:33:16 +0100 Subject: [PATCH 1/2] [Bugfix] Ensure tensors are contiguous during serialisation Signed-off-by: Lukas Geiger --- vllm/v1/serial_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py index fbd38fc47203..410320a1881d 100644 --- a/vllm/v1/serial_utils.py +++ b/vllm/v1/serial_utils.py @@ -158,6 +158,8 @@ def _encode_tensor( self, obj: torch.Tensor ) -> tuple[str, tuple[int, ...], Union[int, memoryview]]: assert self.aux_buffers is not None + # this creates a copy of the tensor if it's not already contiguous + obj = obj.contiguous() # view the tensor as a 1D array of bytes arr = obj.flatten().view(torch.uint8).numpy() if obj.nbytes < self.size_threshold: From 1bc3bf38a8427b4efbdcce35272b94b51b145480 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Thu, 29 May 2025 00:28:21 +0100 Subject: [PATCH 2/2] Force contiguous data after flatten Signed-off-by: Lukas Geiger --- vllm/v1/serial_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py index 410320a1881d..78f37c1e8b21 100644 --- a/vllm/v1/serial_utils.py +++ b/vllm/v1/serial_utils.py @@ -158,10 +158,8 @@ def _encode_tensor( self, obj: torch.Tensor ) -> tuple[str, tuple[int, ...], Union[int, memoryview]]: assert self.aux_buffers is not None - # this creates a copy of the tensor if it's not already contiguous - obj = obj.contiguous() - # view the tensor as a 1D array of bytes - arr = obj.flatten().view(torch.uint8).numpy() + # view the tensor as a contiguous 1D array of bytes + arr = obj.flatten().contiguous().view(torch.uint8).numpy() if obj.nbytes < self.size_threshold: # Smaller tensors are encoded inline, just like ndarrays. data = msgpack.Ext(CUSTOM_TYPE_RAW_VIEW, arr.data)