From 8866a22dcbe9a4c6278ebcc8f58542522e31f19e Mon Sep 17 00:00:00 2001 From: Rattus Date: Sat, 27 Sep 2025 19:37:48 +1000 Subject: [PATCH 1/2] flux: math: Use _addcmul to avoid expensive VRAM intermediate The rope process can be the VRAM peak and this intermediate for the addition result before releasing the original can OOM. addcmul_ it. --- comfy/ldm/flux/math.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py index fb7cd75861f8..8deda0d4acbf 100644 --- a/comfy/ldm/flux/math.py +++ b/comfy/ldm/flux/math.py @@ -37,7 +37,10 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor: def apply_rope1(x: Tensor, freqs_cis: Tensor): x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2) - x_out = freqs_cis[..., 0] * x_[..., 0] + freqs_cis[..., 1] * x_[..., 1] + + x_out = freqs_cis[..., 0] * x_[..., 0] + x_out.addcmul_(freqs_cis[..., 1], x_[..., 1]) + return x_out.reshape(*x.shape).type_as(x) def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): From 98ca6030f36eee897a3cea1c958837e7d9385acb Mon Sep 17 00:00:00 2001 From: Rattus Date: Sat, 27 Sep 2025 21:38:40 +1000 Subject: [PATCH 2/2] wan: Delete the self attention before cross attention This saves VRAM when the cross attention and FFN are in play as the VRAM peak. --- comfy/ldm/wan/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 2dac5980cd99..54f61a80794e 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -237,6 +237,7 @@ def forward( freqs, transformer_options=transformer_options) x = torch.addcmul(x, y, repeat_e(e[2], x)) + del y # cross-attention & ffn x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len, transformer_options=transformer_options)