diff --git a/docs/user_guide/diffusion/cpu_offload_diffusion.md b/docs/user_guide/diffusion/cpu_offload_diffusion.md index be72efffa5..f80005ccb7 100644 --- a/docs/user_guide/diffusion/cpu_offload_diffusion.md +++ b/docs/user_guide/diffusion/cpu_offload_diffusion.md @@ -139,11 +139,15 @@ Factory function `get_offload_backend()` selects the appropriate backend based o ## Supported Models -| Architecture | Example Models | DiT Class | Model-Level Offload | Layerwise Offload | Blocks Attr (Layerwise specific) | -|--------------|----------------|-----------|---------------------|-------------------|-------------| -| Wan22Pipeline | `Wan-AI/Wan2.2-T2V-A14B-Diffusers` | `WanTransformer3DModel` | ✓ | ✓ | `"blocks"` | -| Wan22I2VPipeline | `Wan-AI/Wan2.2-I2V-A14B-Diffusers` | `WanTransformer3DModel` | ✓ | ✓ | `"blocks"` | +| Architecture | Example Models | DiT Class | Model-Level Offload | Layerwise Offload | Blocks Attrs (Layerwise specific) | +|--------------|----------------|-----------|---------------------|-------------------|-----------------------------------| +| LongCatImagePipeline | `meituan-longcat/LongCat-Image` | `LongCatImageTransformer2DModel` | - | ✓ | `"transformer_blocks"`, `"single_transformer_blocks"` | +| NextStep11Pipeline | `stepfun-ai/NextStep-1.1` | `NextStepModel` | - | ✓ | `"layers"` | +| OvisImagePipeline | `AIDC-AI/Ovis-Image-7B` | `OvisImageTransformer2DModel` | - | ✓ | `"transformer"` | | QwenImagePipeline | `Qwen/Qwen-Image` | `QwenImageTransformer2DModel` | ✓ | ✓ | `"transformer_blocks"` | +| StableDiffusion3Pipeline | `stabilityai/stable-diffusion-3.5-medium` | `SD3Transformer2DModel` | - | ✓ | `"transformer_blocks"` | +| Wan22I2VPipeline | `Wan-AI/Wan2.2-I2V-A14B-Diffusers` | `WanTransformer3DModel` | ✓ | ✓ | `"blocks"` | +| Wan22Pipeline | `Wan-AI/Wan2.2-T2V-A14B-Diffusers` | `WanTransformer3DModel` | ✓ | ✓ | `"blocks"` | **Notes:** - Model-Level Offloading is expected to be supported by all common diffusion models (DiT and encoders) naturally diff --git a/docs/user_guide/diffusion_features.md b/docs/user_guide/diffusion_features.md index d4d9ce6a3d..2f04e35687 100644 --- a/docs/user_guide/diffusion_features.md +++ b/docs/user_guide/diffusion_features.md @@ -107,19 +107,19 @@ The following tables show which models support each feature: | **FLUX.2-dev** | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | **GLM-Image** | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | **HunyuanImage3** | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| **LongCat-Image** | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **LongCat-Image-Edit** | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **LongCat-Image** | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | +| **LongCat-Image-Edit** | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | | **MagiHuman** | ❌ | ❌ | ❌ | ❓ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | | **MammothModa2(T2I)** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Nextstep_1(T2I)** | ❓ | ❓ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Nextstep_1(T2I)** | ❓ | ❓ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | | **OmniGen2** | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Ovis-Image** | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Ovis-Image** | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | **Qwen-Image** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ✅ | ✅ | | **Qwen-Image-2512** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ✅ | ✅ | | **Qwen-Image-Edit** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ❌ | ❌ | | **Qwen-Image-Edit-2509** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ✅ | ❌ | ❌ | | **Qwen-Image-Layered** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ❌ | ❌ | -| **Stable-Diffusion3.5** | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ (decode) | ❌ | ❌ | +| **Stable-Diffusion3.5** | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ (decode) | ❌ | ❌ | | **Z-Image** | ✅ | ✅ | ✅ | ❓ | ✅ (TP=2 only) | ✅ | ❌ | ✅ (decode) | ✅ | ❌ | > Notes: diff --git a/examples/offline_inference/text_to_image/text_to_image.py b/examples/offline_inference/text_to_image/text_to_image.py index 42e44abb89..615e4067ed 100644 --- a/examples/offline_inference/text_to_image/text_to_image.py +++ b/examples/offline_inference/text_to_image/text_to_image.py @@ -376,7 +376,7 @@ def main(): f"vae_patch_parallel_size={args.vae_patch_parallel_size}, " f"enable_expert_parallel={args.enable_expert_parallel}." ) - print(f" CPU offload: {args.enable_cpu_offload}") + print(f" CPU offload: {args.enable_cpu_offload}; CPU Layerwise Offload: {args.enable_layerwise_offload}") print(f" Image size: {args.width}x{args.height}") if args.lora_path: print(f" LoRA: scale={args.lora_scale}") diff --git a/vllm_omni/diffusion/models/longcat_image/longcat_image_transformer.py b/vllm_omni/diffusion/models/longcat_image/longcat_image_transformer.py index 8d8e523d60..8f0ff446af 100644 --- a/vllm_omni/diffusion/models/longcat_image/longcat_image_transformer.py +++ b/vllm_omni/diffusion/models/longcat_image/longcat_image_transformer.py @@ -582,6 +582,7 @@ class LongCatImageTransformer2DModel(nn.Module): """ _repeated_blocks = ["LongCatImageTransformerBlock", "LongCatImageSingleTransformerBlock"] + _layerwise_offload_blocks_attrs = ["transformer_blocks", "single_transformer_blocks"] # Sequence Parallelism for LongCat (following diffusers' _cp_plan pattern) _sp_plan = { diff --git a/vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep.py b/vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep.py index ded3079265..d2b3eb81e3 100644 --- a/vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep.py +++ b/vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep.py @@ -114,6 +114,8 @@ def from_json(cls, path: str) -> NextStepConfig: class NextStepModel(nn.Module): + _layerwise_offload_blocks_attrs = ["layers"] + def __init__(self, config: NextStepConfig): super().__init__() self.config = config diff --git a/vllm_omni/diffusion/models/ovis_image/ovis_image_transformer.py b/vllm_omni/diffusion/models/ovis_image/ovis_image_transformer.py index bd2a3b4834..0e98729c3d 100644 --- a/vllm_omni/diffusion/models/ovis_image/ovis_image_transformer.py +++ b/vllm_omni/diffusion/models/ovis_image/ovis_image_transformer.py @@ -366,6 +366,7 @@ class OvisImageTransformer2DModel(nn.Module): """ _repeated_blocks = ["OvisImageTransformerBlock", "OvisImageSingleTransformerBlock"] + _layerwise_offload_blocks_attrs = ["transformer_blocks", "single_transformer_blocks"] def __init__( self, diff --git a/vllm_omni/diffusion/models/sd3/sd3_transformer.py b/vllm_omni/diffusion/models/sd3/sd3_transformer.py index 308bd35a13..89f0615775 100644 --- a/vllm_omni/diffusion/models/sd3/sd3_transformer.py +++ b/vllm_omni/diffusion/models/sd3/sd3_transformer.py @@ -387,6 +387,7 @@ class SD3Transformer2DModel(nn.Module): """ _repeated_blocks = ["SD3TransformerBlock"] + _layerwise_offload_blocks_attrs = ["transformer_blocks"] def __init__( self, diff --git a/vllm_omni/diffusion/offloader/module_collector.py b/vllm_omni/diffusion/offloader/module_collector.py index d9d21b939a..a09a337001 100644 --- a/vllm_omni/diffusion/offloader/module_collector.py +++ b/vllm_omni/diffusion/offloader/module_collector.py @@ -21,7 +21,7 @@ class PipelineModules: class ModuleDiscovery: """Discovers pipeline components for offloading""" - DIT_ATTRS = ["transformer", "transformer_2", "dit", "sr_dit", "language_model", "transformer_blocks"] + DIT_ATTRS = ["transformer", "transformer_2", "dit", "sr_dit", "language_model", "transformer_blocks", "model"] ENCODER_ATTRS = ["text_encoder", "text_encoder_2", "text_encoder_3", "image_encoder"] VAE_ATTRS = ["vae", "audio_vae"]