diff --git a/src/diffusers/schedulers/scheduling_consistency_models.py b/src/diffusers/schedulers/scheduling_consistency_models.py index 23cd3ec134b7..b9a21f9bbd37 100644 --- a/src/diffusers/schedulers/scheduling_consistency_models.py +++ b/src/diffusers/schedulers/scheduling_consistency_models.py @@ -98,6 +98,7 @@ def __init__( self.custom_timesteps = False self.is_scale_input_called = False self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication def index_for_timestep(self, timestep, schedule_timesteps=None): if schedule_timesteps is None: @@ -230,6 +231,7 @@ def set_timesteps( self.timesteps = torch.from_numpy(timesteps).to(device=device) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Modified _convert_to_karras implementation that takes in ramp as argument def _convert_to_karras(self, ramp): diff --git a/src/diffusers/schedulers/scheduling_deis_multistep.py b/src/diffusers/schedulers/scheduling_deis_multistep.py index bd44d2444154..572078a9d604 100644 --- a/src/diffusers/schedulers/scheduling_deis_multistep.py +++ b/src/diffusers/schedulers/scheduling_deis_multistep.py @@ -187,6 +187,7 @@ def __init__( self.model_outputs = [None] * solver_order self.lower_order_nums = 0 self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def step_index(self): @@ -254,6 +255,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic # add an index counter for schedulers that allow duplicated timesteps self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py index 086505c5052b..49c07a504985 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -214,6 +214,7 @@ def __init__( self.model_outputs = [None] * solver_order self.lower_order_nums = 0 self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def step_index(self): @@ -290,6 +291,7 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc # add an index counter for schedulers that allow duplicated timesteps self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py index cfb53c943cea..5d8f3fdf49cd 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py @@ -209,6 +209,7 @@ def __init__( self.model_outputs = [None] * solver_order self.lower_order_nums = 0 self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication self.use_karras_sigmas = use_karras_sigmas @property @@ -289,6 +290,7 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc # add an index counter for schedulers that allow duplicated timesteps self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py index 12345a26bcf2..a999a8adbfa7 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py @@ -198,6 +198,7 @@ def __init__( self.noise_sampler = None self.noise_sampler_seed = noise_sampler_seed self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep def index_for_timestep(self, timestep, schedule_timesteps=None): @@ -347,6 +348,7 @@ def set_timesteps( self.mid_point_sigma = None self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication self.noise_sampler = None # for exp beta schedules, such as the one for `pipeline_shap_e.py` diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py index 7e8149ab55c4..dea033822e14 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -197,6 +197,7 @@ def __init__( self.sample = None self.order_list = self.get_order_list(num_train_timesteps) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication def get_order_list(self, num_inference_steps: int) -> List[int]: """ @@ -288,6 +289,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic # add an index counter for schedulers that allow duplicated timesteps self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 7c0dd803d91b..e476c329455e 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -166,6 +166,7 @@ def __init__( self.is_scale_input_called = False self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def init_noise_sigma(self): @@ -249,6 +250,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic self.timesteps = torch.from_numpy(timesteps).to(device=device) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index def _init_step_index(self, timestep): diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 802ba0f099f9..c72f7ff336aa 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -237,6 +237,7 @@ def __init__( self.use_karras_sigmas = use_karras_sigmas self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def init_noise_sigma(self): @@ -341,6 +342,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication def _sigma_to_t(self, sigma, log_sigmas): # get log sigma diff --git a/src/diffusers/schedulers/scheduling_heun_discrete.py b/src/diffusers/schedulers/scheduling_heun_discrete.py index 460299cf2ec1..d06459e0a264 100644 --- a/src/diffusers/schedulers/scheduling_heun_discrete.py +++ b/src/diffusers/schedulers/scheduling_heun_discrete.py @@ -148,6 +148,7 @@ def __init__( self.use_karras_sigmas = use_karras_sigmas self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication def index_for_timestep(self, timestep, schedule_timesteps=None): if schedule_timesteps is None: @@ -269,6 +270,7 @@ def set_timesteps( self.dt = None self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # (YiYi Notes: keep this for now since we are keeping add_noise function which use index_for_timestep) # for exp beta schedules, such as the one for `pipeline_shap_e.py` diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py index aae5a15abca2..dbf0984ed503 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py @@ -140,6 +140,7 @@ def __init__( # set all values self.set_timesteps(num_train_timesteps, None, num_train_timesteps) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep def index_for_timestep(self, timestep, schedule_timesteps=None): @@ -295,6 +296,7 @@ def set_timesteps( self._index_counter = defaultdict(int) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t def _sigma_to_t(self, sigma, log_sigmas): diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py index 3248520aa9a5..e1e5124d70e5 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py @@ -140,6 +140,7 @@ def __init__( self.set_timesteps(num_train_timesteps, None, num_train_timesteps) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler.index_for_timestep def index_for_timestep(self, timestep, schedule_timesteps=None): @@ -284,6 +285,7 @@ def set_timesteps( self._index_counter = defaultdict(int) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def state_in_first_order(self): diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 90e81c9b3c2c..a78fa0e42639 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -168,6 +168,7 @@ def __init__( self.is_scale_input_called = False self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def init_noise_sigma(self): @@ -279,6 +280,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic self.sigmas = torch.from_numpy(sigmas).to(device=device) self.timesteps = torch.from_numpy(timesteps).to(device=device) self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication self.derivatives = [] diff --git a/src/diffusers/schedulers/scheduling_unipc_multistep.py b/src/diffusers/schedulers/scheduling_unipc_multistep.py index eaa6273e2768..c147e0142a32 100644 --- a/src/diffusers/schedulers/scheduling_unipc_multistep.py +++ b/src/diffusers/schedulers/scheduling_unipc_multistep.py @@ -198,6 +198,7 @@ def __init__( self.solver_p = solver_p self.last_sample = None self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication @property def step_index(self): @@ -268,6 +269,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic # add an index counter for schedulers that allow duplicated timesteps self._step_index = None + self.sigmas.to("cpu") # to avoid too much CPU/GPU communication # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: