Skip to content

Commit 3620357

Browse files
committed
clean up
1 parent b658618 commit 3620357

File tree

5 files changed

+27
-26
lines changed

5 files changed

+27
-26
lines changed

src/diffusers/loaders/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@ def text_encoder_attn_modules(text_encoder):
5454
_import_structure = {}
5555

5656
if is_torch_available():
57-
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
58-
_import_structure["utils"] = ["AttnProcsLayers"]
59-
_import_structure["controlnet"] = ["FromOriginalControlnetMixin"]
6057
_import_structure["autoencoder"] = ["FromOriginalVAEMixin"]
6158

59+
_import_structure["controlnet"] = ["FromOriginalControlNetMixin"]
60+
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
61+
_import_structure["utils"] = ["AttnProcsLayers"]
6262
if is_transformers_available():
6363
_import_structure["single_file"] = ["FromSingleFileMixin"]
6464
_import_structure["lora"] = ["LoraLoaderMixin", "StableDiffusionXLLoraLoaderMixin"]
@@ -71,7 +71,7 @@ def text_encoder_attn_modules(text_encoder):
7171
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
7272
if is_torch_available():
7373
from .autoencoder import FromOriginalVAEMixin
74-
from .controlnet import FromOriginalControlnetMixin
74+
from .controlnet import FromOriginalControlNetMixin
7575
from .unet import UNet2DConditionLoadersMixin
7676
from .utils import AttnProcsLayers
7777

src/diffusers/loaders/autoencoder.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@
2222

2323
class FromOriginalVAEMixin:
2424
"""
25-
Load pretrained ControlNet weights saved in the `.ckpt` or `.safetensors` format into a [`ControlNetModel`].
25+
Load pretrained AutoencoderKL weights saved in the `.ckpt` or `.safetensors` format into a [`ControlNetModel`].
2626
"""
2727

2828
@classmethod
2929
@validate_hf_hub_args
3030
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
3131
r"""
32-
Instantiate a [`ControlNetModel`] from pretrained ControlNet weights saved in the original `.ckpt` or
32+
Instantiate a [`AutoencoderKL`] from pretrained ControlNet weights saved in the original `.ckpt` or
3333
`.safetensors` format. The pipeline is set in evaluation mode (`model.eval()`) by default.
3434
3535
Parameters:
@@ -62,32 +62,35 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
6262
revision (`str`, *optional*, defaults to `"main"`):
6363
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
6464
allowed by Git.
65+
image_size (`int`, *optional*, defaults to 512):
66+
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
67+
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
6568
use_safetensors (`bool`, *optional*, defaults to `None`):
6669
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
6770
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
6871
weights. If set to `False`, safetensors weights are not loaded.
69-
image_size (`int`, *optional*, defaults to 512):
70-
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
71-
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
72-
upcast_attention (`bool`, *optional*, defaults to `None`):
73-
Whether the attention computation should always be upcasted.
7472
kwargs (remaining dictionary of keyword arguments, *optional*):
7573
Can be used to overwrite load and saveable variables (for example the pipeline components of the
7674
specific pipeline class). The overwritten components are directly passed to the pipelines `__init__`
7775
method. See example below for more information.
7876
77+
<Tip warning={true}>
78+
79+
Make sure to pass both `image_size` and `scaling_factor` to `from_single_file()` if you're loading
80+
a VAE from SDXL or a Stable Diffusion v2 model or higher.
81+
82+
</Tip>
83+
7984
Examples:
8085
8186
```py
82-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
87+
from diffusers import AutoencoderKL
8388
84-
url = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" # can also be a local path
85-
model = ControlNetModel.from_single_file(url)
86-
87-
url = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned.safetensors" # can also be a local path
88-
pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet)
89+
url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" # can also be local file
90+
model = AutoencoderKL.from_single_file(url)
8991
```
9092
"""
93+
9194
original_config_file = kwargs.pop("original_config_file", None)
9295
resume_download = kwargs.pop("resume_download", False)
9396
force_download = kwargs.pop("force_download", False)

src/diffusers/loaders/controlnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121

2222

23-
class FromOriginalControlnetMixin:
23+
class FromOriginalControlNetMixin:
2424
"""
2525
Load pretrained ControlNet weights saved in the `.ckpt` or `.safetensors` format into a [`ControlNetModel`].
2626
"""

src/diffusers/loaders/single_file_utils.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ def create_controlnet_diffusers_config(original_config, image_size: int):
507507
return controlnet_config
508508

509509

510-
def create_vae_diffusers_config(original_config, image_size: int):
510+
def create_vae_diffusers_config(original_config, image_size, scaling_factor=0.18125):
511511
"""
512512
Creates a config for the diffusers based on the config of the LDM model.
513513
"""
@@ -526,6 +526,7 @@ def create_vae_diffusers_config(original_config, image_size: int):
526526
"block_out_channels": tuple(block_out_channels),
527527
"latent_channels": vae_params["z_channels"],
528528
"layers_per_block": vae_params["num_res_blocks"],
529+
"scaling_factor": scaling_factor,
529530
}
530531

531532
return config
@@ -1134,17 +1135,14 @@ def create_diffusers_unet_model_from_ldm(
11341135

11351136

11361137
def create_diffusers_vae_model_from_ldm(
1137-
pipeline_class_name,
1138-
original_config,
1139-
checkpoint,
1140-
image_size=None,
1138+
pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=0.18125
11411139
):
11421140
# import here to avoid circular imports
11431141
from ..models import AutoencoderKL
11441142

11451143
image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size)
11461144

1147-
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
1145+
vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor)
11481146
diffusers_format_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
11491147
ctx = init_empty_weights if is_accelerate_available() else nullcontext
11501148

src/diffusers/models/controlnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from torch.nn import functional as F
2020

2121
from ..configuration_utils import ConfigMixin, register_to_config
22-
from ..loaders import FromOriginalControlnetMixin
22+
from ..loaders import FromOriginalControlNetMixin
2323
from ..utils import BaseOutput, logging
2424
from .attention_processor import (
2525
ADDED_KV_ATTENTION_PROCESSORS,
@@ -102,7 +102,7 @@ def forward(self, conditioning):
102102
return embedding
103103

104104

105-
class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
105+
class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin):
106106
"""
107107
A ControlNet model.
108108

0 commit comments

Comments
 (0)