From 6b9e73d514997f7306b7e2373f33c8f4c4d3b7ae Mon Sep 17 00:00:00 2001 From: Max <58072309+MK-986123@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:18:01 -0700 Subject: [PATCH] fix: ensure writable numpy arrays in GGUF loader to prevent PyTorch VRAM spikes --- invokeai/backend/quantization/gguf/loaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 178c0508466..118cbb8822a 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -12,7 +12,7 @@ def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTens sd: dict[str, GGMLTensor] = {} for tensor in reader.tensors: - torch_tensor = torch.from_numpy(tensor.data) + torch_tensor = torch.from_numpy(tensor.data.copy() if not tensor.data.flags.writeable else tensor.data) shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape))) if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: torch_tensor = torch_tensor.view(*shape)