From e5349005d3f895956e6a902568fbe740c06676d0 Mon Sep 17 00:00:00 2001 From: Robert Dargavel Smith Date: Sat, 15 Apr 2023 12:54:08 +0100 Subject: [PATCH 1/2] config fixes --- .../pipelines/audio_diffusion/pipeline_audio_diffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py index 1df76ed6c52c..d6800c516ad4 100644 --- a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py +++ b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py @@ -234,7 +234,7 @@ def encode(self, images: List[Image.Image], steps: int = 50) -> np.ndarray: sample = torch.Tensor(sample).to(self.device) for t in self.progress_bar(torch.flip(self.scheduler.timesteps, (0,))): - prev_timestep = t - self.scheduler.num_train_timesteps // self.scheduler.num_inference_steps + prev_timestep = t - self.scheduler.config.num_train_timesteps // self.scheduler.num_inference_steps alpha_prod_t = self.scheduler.alphas_cumprod[t] alpha_prod_t_prev = ( self.scheduler.alphas_cumprod[prev_timestep] From 3f24e15e117e001859e7ead999774bb2542f73bd Mon Sep 17 00:00:00 2001 From: Robert Dargavel Smith Date: Sat, 22 Apr 2023 09:42:49 +0100 Subject: [PATCH 2/2] deprecate get_input_dims --- .../audio_diffusion/pipeline_audio_diffusion.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py index d6800c516ad4..629a2e7d32ca 100644 --- a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py +++ b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py @@ -51,21 +51,6 @@ def __init__( super().__init__() self.register_modules(unet=unet, scheduler=scheduler, mel=mel, vqvae=vqvae) - def get_input_dims(self) -> Tuple: - """Returns dimension of input image - - Returns: - `Tuple`: (height, width) - """ - input_module = self.vqvae if self.vqvae is not None else self.unet - # For backwards compatibility - sample_size = ( - (input_module.config.sample_size, input_module.config.sample_size) - if type(input_module.config.sample_size) == int - else input_module.config.sample_size - ) - return sample_size - def get_default_steps(self) -> int: """Returns default number of steps recommended for inference @@ -123,8 +108,6 @@ def __call__( # For backwards compatibility if type(self.unet.config.sample_size) == int: self.unet.config.sample_size = (self.unet.config.sample_size, self.unet.config.sample_size) - input_dims = self.get_input_dims() - self.mel.set_resolution(x_res=input_dims[1], y_res=input_dims[0]) if noise is None: noise = randn_tensor( (