From 606f5945a3a5d8952ba65af6e57aeab579d76c79 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 6 Jul 2024 01:40:06 +0200 Subject: [PATCH 1/3] suppoort custom sigmas --- .../lavender_flow/pipeline_lavender_flow.py | 19 +++++++-------- .../scheduling_flow_match_euler_discrete.py | 24 ++++++++++++------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py index 05778f1a147c..f38921acb1e9 100644 --- a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py +++ b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py @@ -17,6 +17,8 @@ import torch from transformers import T5Tokenizer, UMT5EncoderModel +import numpy as np + from ...image_processor import VaeImageProcessor from ...models import AutoencoderKL, LavenderFlowTransformer2DModel from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor @@ -428,6 +430,8 @@ def __call__( prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) # 4. Prepare timesteps + + sigmas = np.linspace(1.0, 1/num_inference_steps,num_inference_steps) timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, num_inference_steps, device, timesteps, sigmas ) @@ -448,20 +452,13 @@ def __call__( # 6. Denoising loop num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0) - dt = 1.0 / num_inference_steps - dt = ( - torch.tensor([dt] * effective_batch_size) - .to(self.device) - .view([effective_batch_size, *([1] * len(latents.shape[1:]))]) - ) with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(range(num_inference_steps, 0, -1)): + for i, t in enumerate(timesteps): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents # broadcast to batch dimension in a way that's compatible with ONNX/Core ML - t = t / num_inference_steps timestep = ( - torch.tensor([t]).expand(latent_model_input.shape[0]).to(latents.device, dtype=latents.dtype) + torch.tensor([t/1000]).expand(latent_model_input.shape[0]).to(latents.device, dtype=latents.dtype) ) # predict noise model_output @@ -476,9 +473,9 @@ def __call__( if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - + # compute the previous noisy sample x_t -> x_t-1 - latents = (latents - dt * noise_pred).to(latents.dtype) + latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0] # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): diff --git a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py index 83ce63981abd..28099c1ae71e 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -13,7 +13,7 @@ # limitations under the License. from dataclasses import dataclass -from typing import Optional, Tuple, Union +from typing import Optional, Tuple, Union, List import numpy as np import torch @@ -158,7 +158,11 @@ def scale_noise( def _sigma_to_t(self, sigma): return sigma * self.config.num_train_timesteps - def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + def set_timesteps( + self, + num_inference_steps: int = None, + device: Union[str, torch.device] = None, + sigmas: Optional[List[float]] = None): """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -168,17 +172,19 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic device (`str` or `torch.device`, *optional*): The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. """ - self.num_inference_steps = num_inference_steps - timesteps = np.linspace( - self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps - ) + if sigmas is None: + self.num_inference_steps = num_inference_steps + timesteps = np.linspace( + self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps + ) - sigmas = timesteps / self.config.num_train_timesteps - sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas) + sigmas = timesteps / self.config.num_train_timesteps + sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas) + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) - timesteps = sigmas * self.config.num_train_timesteps + self.timesteps = timesteps.to(device=device) self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) From 214236145c14ba7243427fbfa83402dda1efaddc Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sat, 6 Jul 2024 01:45:02 +0200 Subject: [PATCH 2/3] style --- .../lavender_flow/pipeline_lavender_flow.py | 12 ++++++------ .../scheduling_flow_match_euler_discrete.py | 15 ++++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py index f38921acb1e9..4c5970430a61 100644 --- a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py +++ b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py @@ -14,11 +14,10 @@ import inspect from typing import Callable, List, Optional, Tuple, Union +import numpy as np import torch from transformers import T5Tokenizer, UMT5EncoderModel -import numpy as np - from ...image_processor import VaeImageProcessor from ...models import AutoencoderKL, LavenderFlowTransformer2DModel from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor @@ -431,14 +430,13 @@ def __call__( # 4. Prepare timesteps - sigmas = np.linspace(1.0, 1/num_inference_steps,num_inference_steps) + sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, num_inference_steps, device, timesteps, sigmas ) # 5. Prepare latents. latent_channels = self.transformer.config.in_channels - effective_batch_size = batch_size * num_images_per_prompt latents = self.prepare_latents( batch_size * num_images_per_prompt, latent_channels, @@ -458,7 +456,9 @@ def __call__( latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents # broadcast to batch dimension in a way that's compatible with ONNX/Core ML timestep = ( - torch.tensor([t/1000]).expand(latent_model_input.shape[0]).to(latents.device, dtype=latents.dtype) + torch.tensor([t / 1000]) + .expand(latent_model_input.shape[0]) + .to(latents.device, dtype=latents.dtype) ) # predict noise model_output @@ -473,7 +473,7 @@ def __call__( if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - + # compute the previous noisy sample x_t -> x_t-1 latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0] diff --git a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py index 28099c1ae71e..779e691f0c27 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -13,7 +13,7 @@ # limitations under the License. from dataclasses import dataclass -from typing import Optional, Tuple, Union, List +from typing import List, Optional, Tuple, Union import numpy as np import torch @@ -159,10 +159,11 @@ def _sigma_to_t(self, sigma): return sigma * self.config.num_train_timesteps def set_timesteps( - self, - num_inference_steps: int = None, - device: Union[str, torch.device] = None, - sigmas: Optional[List[float]] = None): + self, + num_inference_steps: int = None, + device: Union[str, torch.device] = None, + sigmas: Optional[List[float]] = None, + ): """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -181,10 +182,10 @@ def set_timesteps( sigmas = timesteps / self.config.num_train_timesteps sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas) - + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) timesteps = sigmas * self.config.num_train_timesteps - + self.timesteps = timesteps.to(device=device) self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) From 89bd27aa8a1ee3d2c3f3fa3f940e44d32ec2b780 Mon Sep 17 00:00:00 2001 From: yiyixuxu Date: Sun, 7 Jul 2024 20:44:17 +0200 Subject: [PATCH 3/3] apply feedbacks --- .../lavender_flow/pipeline_lavender_flow.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py index 4c5970430a61..ba7a075626c1 100644 --- a/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py +++ b/src/diffusers/pipelines/lavender_flow/pipeline_lavender_flow.py @@ -14,7 +14,6 @@ import inspect from typing import Callable, List, Optional, Tuple, Union -import numpy as np import torch from transformers import T5Tokenizer, UMT5EncoderModel @@ -430,7 +429,7 @@ def __call__( # 4. Prepare timesteps - sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) + # sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, num_inference_steps, device, timesteps, sigmas ) @@ -454,12 +453,11 @@ def __call__( for i, t in enumerate(timesteps): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + + # aura use timestep value between 0 and 1, with t=1 as noise and t=0 as the image # broadcast to batch dimension in a way that's compatible with ONNX/Core ML - timestep = ( - torch.tensor([t / 1000]) - .expand(latent_model_input.shape[0]) - .to(latents.device, dtype=latents.dtype) - ) + timestep = torch.tensor([t / 1000]).expand(latent_model_input.shape[0]) + timestep = timestep.to(latents.device, dtype=latents.dtype) # predict noise model_output noise_pred = self.transformer(