diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py index 5c34efee0970..fbb233dccd7a 100644 --- a/examples/community/clip_guided_stable_diffusion.py +++ b/examples/community/clip_guided_stable_diffusion.py @@ -11,6 +11,7 @@ AutoencoderKL, DDIMScheduler, DiffusionPipeline, + DPMSolverMultistepScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, @@ -63,7 +64,7 @@ def __init__( clip_model: CLIPModel, tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, - scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler], + scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler], feature_extractor: CLIPImageProcessor, ): super().__init__() @@ -125,17 +126,12 @@ def cond_fn( ): latents = latents.detach().requires_grad_() - if isinstance(self.scheduler, LMSDiscreteScheduler): - sigma = self.scheduler.sigmas[index] - # the model input needs to be scaled to match the continuous ODE formulation in K-LMS - latent_model_input = latents / ((sigma**2 + 1) ** 0.5) - else: - latent_model_input = latents + latent_model_input = self.scheduler.scale_model_input(latents, timestep) # predict the noise residual noise_pred = self.unet(latent_model_input, timestep, encoder_hidden_states=text_embeddings).sample - if isinstance(self.scheduler, (PNDMScheduler, DDIMScheduler)): + if isinstance(self.scheduler, (PNDMScheduler, DDIMScheduler, DPMSolverMultistepScheduler)): alpha_prod_t = self.scheduler.alphas_cumprod[timestep] beta_prod_t = 1 - alpha_prod_t # compute predicted original sample from predicted noise also called diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py index c9d2bc6e5931..c3dee5aa9e9a 100644 --- a/examples/community/clip_guided_stable_diffusion_img2img.py +++ b/examples/community/clip_guided_stable_diffusion_img2img.py @@ -13,6 +13,7 @@ AutoencoderKL, DDIMScheduler, DiffusionPipeline, + DPMSolverMultistepScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, @@ -140,7 +141,7 @@ def __init__( clip_model: CLIPModel, tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, - scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler], + scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler], feature_extractor: CLIPFeatureExtractor, ): super().__init__() @@ -263,17 +264,12 @@ def cond_fn( ): latents = latents.detach().requires_grad_() - if isinstance(self.scheduler, LMSDiscreteScheduler): - sigma = self.scheduler.sigmas[index] - # the model input needs to be scaled to match the continuous ODE formulation in K-LMS - latent_model_input = latents / ((sigma**2 + 1) ** 0.5) - else: - latent_model_input = latents + latent_model_input = self.scheduler.scale_model_input(latents, timestep) # predict the noise residual noise_pred = self.unet(latent_model_input, timestep, encoder_hidden_states=text_embeddings).sample - if isinstance(self.scheduler, (PNDMScheduler, DDIMScheduler)): + if isinstance(self.scheduler, (PNDMScheduler, DDIMScheduler, DPMSolverMultistepScheduler)): alpha_prod_t = self.scheduler.alphas_cumprod[timestep] beta_prod_t = 1 - alpha_prod_t # compute predicted original sample from predicted noise also called