From 130be1d24c343267aa2c5ed69f6eb23aeffdec4e Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Thu, 11 Jan 2024 23:46:37 +0800 Subject: [PATCH 01/17] move model helper function in pipeline to EfficiencyMixin --- src/diffusers/__init__.py | 2 + src/diffusers/pipelines/__init__.py | 2 + src/diffusers/pipelines/pipeline_utils.py | 120 ++++++++++++++++++ .../pipeline_stable_diffusion.py | 117 ----------------- 4 files changed, 124 insertions(+), 117 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index b879f7d3536d..5e276845a85b 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -121,6 +121,7 @@ "DDPMPipeline", "DiffusionPipeline", "DiTPipeline", + "EfficiencyMixin", "ImagePipelineOutput", "KarrasVePipeline", "LDMPipeline", @@ -505,6 +506,7 @@ DDPMPipeline, DiffusionPipeline, DiTPipeline, + EfficiencyMixin, ImagePipelineOutput, KarrasVePipeline, LDMPipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 1bf41aeaf0df..8ccade3aa228 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -48,6 +48,7 @@ _import_structure["pipeline_utils"] = [ "AudioPipelineOutput", "DiffusionPipeline", + "EfficiencyMixin", "ImagePipelineOutput", ] _import_structure["deprecated"].extend( @@ -328,6 +329,7 @@ from .pipeline_utils import ( AudioPipelineOutput, DiffusionPipeline, + EfficiencyMixin, ImagePipelineOutput, ) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 18a4b5cb346b..c2947e3f8dfe 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -42,6 +42,8 @@ from .. import __version__ from ..configuration_utils import ConfigMixin +from ..models import AutoencoderKL +from ..models.attention_processor import FusedAttnProcessor2_0 from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from ..utils import ( @@ -2094,3 +2096,121 @@ def set_attention_slice(self, slice_size: Optional[int]): for module in modules: module.set_attention_slice(slice_size) + +class EfficiencyMixin: + r""" + Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion) + """ + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + self.vae.enable_tiling() + + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_tiling() + + def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): + r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. + + The suffixes after the scaling factors represent the stages where they are being applied. + + Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values + that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. + + Args: + s1 (`float`): + Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to + mitigate "oversmoothing effect" in the enhanced denoising process. + s2 (`float`): + Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to + mitigate "oversmoothing effect" in the enhanced denoising process. + b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. + b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. + """ + if not hasattr(self, "unet"): + raise ValueError("The pipeline must have `unet` for using FreeU.") + self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) + + def disable_freeu(self): + """Disables the FreeU mechanism if enabled.""" + self.unet.disable_freeu() + + def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, + key, value) are fused. For cross-attention modules, key and value projection matrices are fused. + + + + This API is 🧪 experimental. + + + + Args: + unet (`bool`, defaults to `True`): To apply fusion on the UNet. + vae (`bool`, defaults to `True`): To apply fusion on the VAE. + """ + self.fusing_unet = False + self.fusing_vae = False + + if unet: + self.fusing_unet = True + self.unet.fuse_qkv_projections() + self.unet.set_attn_processor(FusedAttnProcessor2_0()) + + if vae: + if not isinstance(self.vae, AutoencoderKL): + raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") + + self.fusing_vae = True + self.vae.fuse_qkv_projections() + self.vae.set_attn_processor(FusedAttnProcessor2_0()) + + def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): + """Disable QKV projection fusion if enabled. + + + + This API is 🧪 experimental. + + + + Args: + unet (`bool`, defaults to `True`): To apply fusion on the UNet. + vae (`bool`, defaults to `True`): To apply fusion on the VAE. + + """ + if unet: + if not self.fusing_unet: + logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") + else: + self.unet.unfuse_qkv_projections() + self.fusing_unet = False + + if vae: + if not self.fusing_vae: + logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") + else: + self.vae.unfuse_qkv_projections() + self.fusing_vae = False \ No newline at end of file diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index a62b050afe92..5249e897ce85 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -23,7 +23,6 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel -from ...models.attention_processor import FusedAttnProcessor2_0 from ...models.lora import adjust_lora_scale_text_encoder from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -248,35 +247,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def _encode_prompt( self, prompt, @@ -666,93 +636,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ From ec74982b7ed08ed75443576d4642ae6b4eade78c Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Fri, 12 Jan 2024 01:31:07 +0800 Subject: [PATCH 02/17] deduplicate functions replaced by EfficiencyMixin --- ...p_guided_images_mixing_stable_diffusion.py | 10 -- .../community/clip_guided_stable_diffusion.py | 10 -- .../clip_guided_stable_diffusion_img2img.py | 10 -- .../community/composable_stable_diffusion.py | 61 +------- examples/community/gluegen.py | 58 +------- examples/community/imagic_stable_diffusion.py | 25 ---- examples/community/img2img_inpainting.py | 27 ---- .../community/interpolate_stable_diffusion.py | 27 ---- .../latent_consistency_interpolate.py | 65 +-------- examples/community/llm_grounded_diffusion.py | 69 +-------- examples/community/lpw_stable_diffusion.py | 111 +-------------- examples/community/lpw_stable_diffusion_xl.py | 131 +----------------- .../multilingual_stable_diffusion.py | 27 ---- .../pipeline_animatediff_controlnet.py | 67 +-------- .../community/pipeline_demofusion_sdxl.py | 39 +----- .../community/pipeline_sdxl_style_aligned.py | 125 +---------------- ..._stable_diffusion_xl_controlnet_adapter.py | 69 +-------- ...diffusion_xl_controlnet_adapter_inpaint.py | 66 +-------- examples/community/pipeline_zero1to3.py | 111 +-------------- examples/community/sd_text2img_k_diffusion.py | 68 +-------- .../community/seed_resize_stable_diffusion.py | 30 +--- .../community/speech_to_image_diffusion.py | 11 +- .../community/stable_diffusion_comparison.py | 28 +--- .../stable_diffusion_controlnet_img2img.py | 85 ------------ .../stable_diffusion_controlnet_inpaint.py | 85 ------------ ...le_diffusion_controlnet_inpaint_img2img.py | 85 ------------ examples/community/stable_diffusion_ipex.py | 109 +-------------- examples/community/stable_diffusion_mega.py | 30 +--- .../community/stable_diffusion_repaint.py | 79 +---------- examples/community/text_inpainting.py | 68 +-------- .../community/unclip_image_interpolation.py | 46 +----- .../community/unclip_text_interpolation.py | 47 +------ .../controlnetxs/pipeline_controlnet_xs.py | 65 +-------- .../pipeline_controlnet_xs_sd_xl.py | 69 +-------- .../research_projects/rdm/pipeline_rdm.py | 119 +--------------- .../animatediff/pipeline_animatediff.py | 66 +-------- .../pipelines/audioldm/pipeline_audioldm.py | 20 +-- .../pipelines/audioldm2/pipeline_audioldm2.py | 16 --- .../controlnet/pipeline_controlnet.py | 70 +--------- .../controlnet/pipeline_controlnet_img2img.py | 70 +--------- .../controlnet/pipeline_controlnet_inpaint.py | 70 +--------- .../pipeline_controlnet_inpaint_sd_xl.py | 65 +-------- .../controlnet/pipeline_controlnet_sd_xl.py | 64 +-------- .../pipeline_controlnet_sd_xl_img2img.py | 65 +-------- .../alt_diffusion/pipeline_alt_diffusion.py | 124 +---------------- .../pipeline_alt_diffusion_img2img.py | 95 +------------ ...pipeline_stable_diffusion_model_editing.py | 22 +-- .../pipeline_stable_diffusion_paradigms.py | 37 +---- .../pipeline_latent_consistency_img2img.py | 70 +--------- .../pipeline_latent_consistency_text2img.py | 70 +--------- .../pipelines/musicldm/pipeline_musicldm.py | 20 +-- .../pipeline_paint_by_example.py | 4 +- src/diffusers/pipelines/pipeline_utils.py | 4 +- .../pipeline_semantic_stable_diffusion.py | 4 +- .../pipeline_stable_diffusion.py | 4 +- ...peline_stable_diffusion_image_variation.py | 32 +---- .../pipeline_stable_diffusion_img2img.py | 99 +------------ .../pipeline_stable_diffusion_inpaint.py | 99 +------------ ...eline_stable_diffusion_instruct_pix2pix.py | 32 +---- ...ipeline_stable_diffusion_latent_upscale.py | 32 +---- .../pipeline_stable_diffusion_upscale.py | 32 +---- .../pipeline_stable_unclip.py | 20 +-- .../pipeline_stable_unclip_img2img.py | 20 +-- ...line_stable_diffusion_attend_and_excite.py | 20 +-- .../pipeline_stable_diffusion_diffedit.py | 39 +----- .../pipeline_stable_diffusion_gligen.py | 33 +---- ...line_stable_diffusion_gligen_text_image.py | 33 +---- .../pipeline_stable_diffusion_k_diffusion.py | 4 +- ...ipeline_stable_diffusion_xl_k_diffusion.py | 124 +---------------- .../pipeline_stable_diffusion_ldm3d.py | 42 +----- .../pipeline_stable_diffusion_panorama.py | 22 +-- .../pipeline_stable_diffusion_safe.py | 4 +- .../pipeline_stable_diffusion_sag.py | 20 +-- .../pipeline_stable_diffusion_xl.py | 123 +--------------- .../pipeline_stable_diffusion_xl_img2img.py | 126 +---------------- .../pipeline_stable_diffusion_xl_inpaint.py | 126 +---------------- ...ne_stable_diffusion_xl_instruct_pix2pix.py | 68 +-------- .../pipeline_stable_diffusion_adapter.py | 48 +------ .../pipeline_stable_diffusion_xl_adapter.py | 64 +-------- .../pipeline_text_to_video_synth.py | 65 +-------- .../pipeline_text_to_video_synth_img2img.py | 65 +-------- .../pipeline_text_to_video_zero.py | 4 +- .../pipeline_text_to_video_zero_sdxl.py | 19 +-- .../unidiffuser/pipeline_unidiffuser.py | 37 +---- 84 files changed, 236 insertions(+), 4308 deletions(-) diff --git a/examples/community/clip_guided_images_mixing_stable_diffusion.py b/examples/community/clip_guided_images_mixing_stable_diffusion.py index 399f5b14506d..6fcbb16963b8 100644 --- a/examples/community/clip_guided_images_mixing_stable_diffusion.py +++ b/examples/community/clip_guided_images_mixing_stable_diffusion.py @@ -113,16 +113,6 @@ def __init__( set_requires_grad(self.text_encoder, False) set_requires_grad(self.clip_model, False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - self.enable_attention_slicing(None) - def freeze_vae(self): set_requires_grad(self.vae, False) diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py index 3f4ab2ab9f4a..9065462940c2 100644 --- a/examples/community/clip_guided_stable_diffusion.py +++ b/examples/community/clip_guided_stable_diffusion.py @@ -89,16 +89,6 @@ def __init__( set_requires_grad(self.text_encoder, False) set_requires_grad(self.clip_model, False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - self.enable_attention_slicing(None) - def freeze_vae(self): set_requires_grad(self.vae, False) diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py index 2dbc9bef9ffe..83e117f02dd2 100644 --- a/examples/community/clip_guided_stable_diffusion_img2img.py +++ b/examples/community/clip_guided_stable_diffusion_img2img.py @@ -163,16 +163,6 @@ def __init__( set_requires_grad(self.text_encoder, False) set_requires_grad(self.clip_model, False) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - self.enable_attention_slicing(None) - def freeze_vae(self): set_requires_grad(self.vae, False) diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index 2693ae45afac..eb099f9398b9 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -22,6 +22,7 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import ( @@ -32,13 +33,13 @@ LMSDiscreteScheduler, PNDMScheduler, ) -from diffusers.utils import deprecate, is_accelerate_available, logging +from diffusers.utils import deprecate, logging logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class ComposableStableDiffusionPipeline(DiffusionPipeline): +class ComposableStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -164,62 +165,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - # TODO(Patrick) - there is currently a bug with cpu offload of nn.Parameter in accelerate - # fix by only offloading self.safety_checker for now - cpu_offload(self.safety_checker.vision_model, device) - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" Encodes the prompt into text encoder hidden states. diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py index ecfe91eb9483..19cbf6cb3b82 100644 --- a/examples/community/gluegen.py +++ b/examples/community/gluegen.py @@ -10,6 +10,7 @@ from diffusers.loaders import LoraLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -193,7 +194,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class GlueGenStableDiffusionPipeline(DiffusionPipeline, LoraLoaderMixin): +class GlueGenStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, LoraLoaderMixin): def __init__( self, vae: AutoencoderKL, @@ -241,35 +242,6 @@ def load_language_adapter( ) self.language_adapter.load_state_dict(torch.load(model_path)) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def _adapt_language(self, prompt_embeds: torch.FloatTensor): prompt_embeds = prompt_embeds / 3 prompt_embeds = self.language_adapter(prompt_embeds) * (self.tensor_norm / 2) @@ -544,32 +516,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py index acd09c7e0bf4..0f744c1557fd 100644 --- a/examples/community/imagic_stable_diffusion.py +++ b/examples/community/imagic_stable_diffusion.py @@ -105,31 +105,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - def train( self, prompt: Union[str, List[str]], diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py index 8ee8355d49a6..71dc3cf712ed 100644 --- a/examples/community/img2img_inpainting.py +++ b/examples/community/img2img_inpainting.py @@ -129,33 +129,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def __call__( self, diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py index 70e4d025a037..4c13e0046b9a 100644 --- a/examples/community/interpolate_stable_diffusion.py +++ b/examples/community/interpolate_stable_diffusion.py @@ -120,33 +120,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def __call__( self, diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py index 7b9e4806bf44..a70fb6a467f5 100644 --- a/examples/community/latent_consistency_interpolate.py +++ b/examples/community/latent_consistency_interpolate.py @@ -9,7 +9,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import LCMScheduler from diffusers.utils import ( @@ -190,7 +190,7 @@ def slerp( class LatentConsistencyModelWalkPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a latent consistency model. @@ -273,67 +273,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt def encode_prompt( self, diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py index d815b4ea8e42..39d530e09b1c 100644 --- a/examples/community/llm_grounded_diffusion.py +++ b/examples/community/llm_grounded_diffusion.py @@ -35,6 +35,7 @@ from diffusers.models.attention_processor import AttnProcessor2_0 from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -267,7 +268,12 @@ def __call__( class LLMGroundedDiffusionPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for layout-grounded text-to-image generation using LLM-grounded Diffusion (LMD+): https://arxiv.org/pdf/2305.13655.pdf. @@ -1180,39 +1186,6 @@ def latent_lmd_guidance( # Below are methods copied from StableDiffusionPipeline # The design choice of not inheriting from StableDiffusionPipeline is discussed here: https://github.com/huggingface/diffusers/pull/5993#issuecomment-1834258517 - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -1522,34 +1495,6 @@ def prepare_latents( latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 7249e033186f..debaef7d4642 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -13,13 +13,12 @@ from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( PIL_INTERPOLATION, deprecate, - is_accelerate_available, - is_accelerate_version, logging, ) from diffusers.utils.torch_utils import randn_tensor @@ -410,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8): class StableDiffusionLongPromptWeightingPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing @@ -534,112 +533,6 @@ def __init__( requires_safety_checker=requires_safety_checker, ) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. - - When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in - several steps. This is useful to save a large amount of memory and to allow the processing of larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): - from accelerate import cpu_offload - else: - raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py index 479c76bbdc56..83f0e8b7818d 100644 --- a/examples/community/lpw_stable_diffusion_xl.py +++ b/examples/community/lpw_stable_diffusion_xl.py @@ -26,11 +26,11 @@ from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel from diffusers.models.attention_processor import ( AttnProcessor2_0, - FusedAttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, ) +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -545,7 +545,12 @@ def retrieve_timesteps( class SDXLLongPromptWeightingPipeline( - DiffusionPipeline, FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin + DiffusionPipeline, + EfficiencyMixin, + FromSingleFileMixin, + IPAdapterMixin, + LoraLoaderMixin, + TextualInversionLoaderMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. @@ -649,39 +654,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def enable_model_cpu_offload(self, gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared @@ -1030,95 +1002,6 @@ def check_inputs( "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`." ) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None): # get the original timestep using init_timestep if denoising_start is None: diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py index 7597efd215af..0a3b49a14d7d 100644 --- a/examples/community/multilingual_stable_diffusion.py +++ b/examples/community/multilingual_stable_diffusion.py @@ -135,33 +135,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def __call__( self, diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index 1285e7c97a9b..dc7b6302c5ea 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -28,7 +28,7 @@ from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unets.unet_motion_model import MotionAdapter from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -111,7 +111,9 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput): frames: Union[torch.Tensor, np.ndarray] -class AnimateDiffControlNetPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin): +class AnimateDiffControlNetPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin +): r""" Pipeline for text-to-video generation. @@ -406,67 +408,6 @@ def decode_latents(self, latents): video = video.float() return video - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py index ab0d3cf9dd29..69624271beed 100644 --- a/examples/community/pipeline_demofusion_sdxl.py +++ b/examples/community/pipeline_demofusion_sdxl.py @@ -23,7 +23,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( is_accelerate_available, @@ -93,7 +93,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): return noise_cfg -class DemoFusionSDXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin): +class DemoFusionSDXLPipeline( + DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin +): r""" Pipeline for text-to-image generation using Stable Diffusion XL. @@ -176,39 +178,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt: str, diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py index fa54b542c5ca..2b6047e97bfb 100644 --- a/examples/community/pipeline_sdxl_style_aligned.py +++ b/examples/community/pipeline_sdxl_style_aligned.py @@ -51,7 +51,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -85,7 +85,7 @@ >>> from typing import List >>> import torch - >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline + >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,EfficiencyMixin >>> from PIL import Image >>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo" @@ -389,6 +389,7 @@ def retrieve_latents( class StyleAlignedSDXLPipeline( DiffusionPipeline, + EfficiencyMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, @@ -504,39 +505,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt: str, @@ -1187,34 +1155,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - def _enable_shared_attention_processors( self, share_attention: bool, @@ -1361,65 +1301,6 @@ def disable_style_aligned(self): self._style_aligned_norm_layers = None self._disable_shared_attention_processors() - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py index e1437bee7a15..490fde58b916 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py @@ -33,7 +33,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -158,7 +158,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetAdapterPipeline( - DiffusionPipeline, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin + DiffusionPipeline, + EfficiencyMixin, + FromSingleFileMixin, + StableDiffusionXLLoraLoaderMixin, + TextualInversionLoaderMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter @@ -234,39 +238,6 @@ def __init__( ) self.default_sample_size = self.unet.config.sample_size - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -863,34 +834,6 @@ def _default_height_width(self, height, width, image): return height, width - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - def prepare_control_image( self, image, diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py index e2a5fec29faf..17777090df24 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py @@ -52,6 +52,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -303,7 +304,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): return noise_cfg -class StableDiffusionXLControlNetAdapterInpaintPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin): +class StableDiffusionXLControlNetAdapterInpaintPipeline( + DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin +): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter https://arxiv.org/abs/2302.08453 @@ -383,39 +386,6 @@ def __init__( ) self.default_sample_size = self.unet.config.sample_size - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -1207,34 +1177,6 @@ def _default_height_width(self, height, width, image): return height, width - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - def prepare_control_image( self, image, diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py index 600cf2dc1b63..6e1c1d015e48 100644 --- a/examples/community/pipeline_zero1to3.py +++ b/examples/community/pipeline_zero1to3.py @@ -22,18 +22,16 @@ # randn_tensor, # replace_example_docstring, # ) -# from ..pipeline_utils import DiffusionPipeline +# from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin # from . import StableDiffusionPipelineOutput # from .safety_checker import StableDiffusionSafetyChecker -from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel +from diffusers import AutoencoderKL, DiffusionPipeline, EfficiencyMixin, UNet2DConditionModel from diffusers.configuration_utils import ConfigMixin, FrozenDict from diffusers.models.modeling_utils import ModelMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( deprecate, - is_accelerate_available, - is_accelerate_version, logging, replace_example_docstring, ) @@ -68,7 +66,7 @@ def forward(self, x): return self.projection(x) -class Zero1to3StableDiffusionPipeline(DiffusionPipeline): +class Zero1to3StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for single view conditioned novel view generation using Zero1to3. @@ -187,109 +185,6 @@ def __init__( self.register_to_config(requires_safety_checker=requires_safety_checker) # self.model_mode = None - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. - - When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in - several steps. This is useful to save a large amount of memory and to allow the processing of larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): - from accelerate import cpu_offload - else: - raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index c6a4bf2ce613..8928eb383b76 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -19,9 +19,9 @@ import torch from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser -from diffusers import DiffusionPipeline, LMSDiscreteScheduler +from diffusers import DiffusionPipeline, EfficiencyMixin, LMSDiscreteScheduler from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from diffusers.utils import is_accelerate_available, logging +from diffusers.utils import logging logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs): return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample -class StableDiffusionPipeline(DiffusionPipeline): +class StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -120,68 +120,6 @@ def set_scheduler(self, scheduler_type: str): sampling = getattr(library, "sampling") self.sampler = getattr(sampling, scheduler_type) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" Encodes the prompt into text encoder hidden states. diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py index 9318277b8f01..c84c222f2360 100644 --- a/examples/community/seed_resize_stable_diffusion.py +++ b/examples/community/seed_resize_stable_diffusion.py @@ -9,6 +9,7 @@ from diffusers import DiffusionPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -18,7 +19,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SeedResizeStableDiffusionPipeline(DiffusionPipeline): +class SeedResizeStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -67,33 +68,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def __call__( self, diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py index 63bcfb662517..7f0bf4bf0293 100644 --- a/examples/community/speech_to_image_diffusion.py +++ b/examples/community/speech_to_image_diffusion.py @@ -18,6 +18,7 @@ PNDMScheduler, UNet2DConditionModel, ) +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import logging @@ -26,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SpeechToImagePipeline(DiffusionPipeline): +class SpeechToImagePipeline(DiffusionPipeline, EfficiencyMixin): def __init__( self, speech_model: WhisperForConditionalGeneration, @@ -62,14 +63,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - if slice_size == "auto": - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - self.enable_attention_slicing(None) - @torch.no_grad() def __call__( self, diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py index 7997a0cc0186..49c9ffa86a61 100644 --- a/examples/community/stable_diffusion_comparison.py +++ b/examples/community/stable_diffusion_comparison.py @@ -12,6 +12,7 @@ StableDiffusionPipeline, UNet2DConditionModel, ) +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -22,7 +23,7 @@ pipe4_model_id = "CompVis/stable-diffusion-v1-4" -class StableDiffusionComparisonPipeline(DiffusionPipeline): +class StableDiffusionComparisonPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for parallel comparison of Stable Diffusion v1-v4 This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for @@ -83,31 +84,6 @@ def __init__( def layers(self) -> Dict[str, Any]: return {k: getattr(self, k) for k in self.config.keys() if not k.startswith("_")} - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def text2img_sd1_1( self, diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py index a2b92fff0fb5..f961c767e416 100644 --- a/examples/community/stable_diffusion_controlnet_img2img.py +++ b/examples/community/stable_diffusion_controlnet_img2img.py @@ -14,8 +14,6 @@ from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( PIL_INTERPOLATION, - is_accelerate_available, - is_accelerate_version, replace_example_docstring, ) from diffusers.utils.torch_utils import randn_tensor @@ -183,89 +181,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - # the safety checker can offload the vae again - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # control net hook has be manually offloaded as it alternates with unet - cpu_offload_with_hook(self.controlnet, device) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/stable_diffusion_controlnet_inpaint.py b/examples/community/stable_diffusion_controlnet_inpaint.py index b87973366418..76e6e331abcb 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint.py +++ b/examples/community/stable_diffusion_controlnet_inpaint.py @@ -15,8 +15,6 @@ from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( PIL_INTERPOLATION, - is_accelerate_available, - is_accelerate_version, replace_example_docstring, ) from diffusers.utils.torch_utils import randn_tensor @@ -282,89 +280,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - # the safety checker can offload the vae again - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # control net hook has be manually offloaded as it alternates with unet - cpu_offload_with_hook(self.controlnet, device) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py index 96ad3c39239d..34b8170f66c8 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py +++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py @@ -14,8 +14,6 @@ from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( PIL_INTERPOLATION, - is_accelerate_available, - is_accelerate_version, replace_example_docstring, ) from diffusers.utils.torch_utils import randn_tensor @@ -267,89 +265,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae, controlnet, and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.controlnet]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - # the safety checker can offload the vae again - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # control net hook has be manually offloaded as it alternates with unet - cpu_offload_with_hook(self.controlnet, device) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py index bf58cc8453a1..48048d5831f4 100644 --- a/examples/community/stable_diffusion_ipex.py +++ b/examples/community/stable_diffusion_ipex.py @@ -23,14 +23,12 @@ from diffusers.configuration_utils import FrozenDict from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( deprecate, - is_accelerate_available, - is_accelerate_version, logging, replace_example_docstring, ) @@ -62,7 +60,7 @@ """ -class StableDiffusionIPEXPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionIPEXPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-to-image generation using Stable Diffusion on IPEX. @@ -304,109 +302,6 @@ def prepare_for_ipex(self, promt, dtype=torch.float32, height=None, width=None, ave_decoder_trace_model = torch.jit.freeze(ave_decoder_trace_model) self.vae.decoder.forward = ave_decoder_trace_model.forward - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. - - When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in - several steps. This is useful to save a large amount of memory and to allow the processing of larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): - from accelerate import cpu_offload - else: - raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt( self, prompt, diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py index faed00b49d40..470cbab5a527 100644 --- a/examples/community/stable_diffusion_mega.py +++ b/examples/community/stable_diffusion_mega.py @@ -16,6 +16,7 @@ UNet2DConditionModel, ) from diffusers.configuration_utils import FrozenDict +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import deprecate, logging @@ -23,7 +24,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionMegaPipeline(DiffusionPipeline): +class StableDiffusionMegaPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -94,33 +95,6 @@ def __init__( def components(self) -> Dict[str, Any]: return {k: getattr(self, k) for k in self.config.keys() if not k.startswith("_")} - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - @torch.no_grad() def inpaint( self, diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index db2de0897570..38400b578f97 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -24,14 +24,13 @@ from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel from diffusers.configuration_utils import FrozenDict, deprecate from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import ( StableDiffusionSafetyChecker, ) from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( - is_accelerate_available, - is_accelerate_version, logging, ) from diffusers.utils.torch_utils import randn_tensor @@ -140,7 +139,7 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class StableDiffusionRepaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionRepaintPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the @@ -276,80 +275,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - Note that offloading happens on a submodule basis. Memory savings are higher than with - `enable_model_cpu_offload`, but performance is lower. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): - from accelerate import cpu_offload - else: - raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]: - cpu_offload(cpu_offloaded_model, device) - - if self.safety_checker is not None: - cpu_offload(self.safety_checker, execution_device=device, offload_buffers=True) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload - def enable_model_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared - to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` - method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with - `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. - """ - if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): - from accelerate import cpu_offload_with_hook - else: - raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") - - device = torch.device(f"cuda:{gpu_id}") - - if self.device.type != "cpu": - self.to("cpu", silence_dtype_warnings=True) - torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) - - hook = None - for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - if self.safety_checker is not None: - _, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook) - - # We'll offload the last model manually. - self.final_offload_hook = hook - - @property - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py index cd02049a4afb..80889d7897bd 100644 --- a/examples/community/text_inpainting.py +++ b/examples/community/text_inpainting.py @@ -13,16 +13,17 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler -from diffusers.utils import deprecate, is_accelerate_available, logging +from diffusers.utils import deprecate, logging logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class TextInpainting(DiffusionPipeline): +class TextInpainting(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text based inpainting using Stable Diffusion. Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask @@ -120,69 +121,6 @@ def __init__( feature_extractor=feature_extractor, ) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - - def enable_sequential_cpu_offload(self): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device("cuda") - - for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - @property - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - @torch.no_grad() def __call__( self, diff --git a/examples/community/unclip_image_interpolation.py b/examples/community/unclip_image_interpolation.py index 95548b152c07..e3bb44e5030b 100644 --- a/examples/community/unclip_image_interpolation.py +++ b/examples/community/unclip_image_interpolation.py @@ -19,7 +19,7 @@ UNet2DModel, ) from diffusers.pipelines.unclip import UnCLIPTextProjModel -from diffusers.utils import is_accelerate_available, logging +from diffusers.utils import logging from diffusers.utils.torch_utils import randn_tensor @@ -204,50 +204,6 @@ def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: return image_embeddings - # Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's - models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only - when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - models = [ - self.decoder, - self.text_proj, - self.text_encoder, - self.super_res_first, - self.super_res_last, - ] - for cpu_offloaded_model in models: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - @property - # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._execution_device - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if self.device != torch.device("meta") or not hasattr(self.decoder, "_hf_hook"): - return self.device - for module in self.decoder.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - @torch.no_grad() def __call__( self, diff --git a/examples/community/unclip_text_interpolation.py b/examples/community/unclip_text_interpolation.py index 764299433b4c..be6a0858b35e 100644 --- a/examples/community/unclip_text_interpolation.py +++ b/examples/community/unclip_text_interpolation.py @@ -15,7 +15,7 @@ UNet2DModel, ) from diffusers.pipelines.unclip import UnCLIPTextProjModel -from diffusers.utils import is_accelerate_available, logging +from diffusers.utils import logging from diffusers.utils.torch_utils import randn_tensor @@ -212,51 +212,6 @@ def _encode_prompt( return prompt_embeds, text_encoder_hidden_states, text_mask - # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.enable_sequential_cpu_offload - def enable_sequential_cpu_offload(self, gpu_id=0): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, the pipeline's - models have their state dicts saved to CPU and then are moved to a `torch.device('meta') and loaded to GPU only - when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device(f"cuda:{gpu_id}") - - # TODO: self.prior.post_process_latents is not covered by the offload hooks, so it fails if added to the list - models = [ - self.decoder, - self.text_proj, - self.text_encoder, - self.super_res_first, - self.super_res_last, - ] - for cpu_offloaded_model in models: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - @property - # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._execution_device - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if self.device != torch.device("meta") or not hasattr(self.decoder, "_hf_hook"): - return self.device - for module in self.decoder.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - @torch.no_grad() def __call__( self, diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py index 32646c7c7715..63461fadfc14 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py @@ -26,7 +26,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -44,7 +44,7 @@ class StableDiffusionControlNetXSPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance. @@ -139,39 +139,6 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -596,34 +563,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() def __call__( self, diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py index b9b390f1c00c..bb2e6ad1dff7 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py @@ -31,7 +31,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -52,7 +52,11 @@ class StableDiffusionXLControlNetXSPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + StableDiffusionXLLoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion XL with ControlNet-XS guidance. @@ -145,39 +149,6 @@ def __init__( self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -661,34 +632,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() def __call__( self, diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py index 28b4cacb8319..5398ee2e5331 100644 --- a/examples/research_projects/rdm/pipeline_rdm.py +++ b/examples/research_projects/rdm/pipeline_rdm.py @@ -17,10 +17,10 @@ LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, - logging, ) from diffusers.image_processor import VaeImageProcessor -from diffusers.utils import is_accelerate_available, randn_tensor +from diffusers.utils import logging +from diffusers.utils.torch_utils import randn_tensor logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -81,121 +81,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.retriever = retriever - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. - - When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in - several steps. This is useful to save a large amount of memory and to allow the processing of larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - if isinstance(self.unet.config.attention_head_dim, int): - slice_size = self.unet.config.attention_head_dim // 2 - else: - slice_size = self.unet.config.attention_head_dim[0] // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - - def enable_sequential_cpu_offload(self): - r""" - Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, - text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a - `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. - """ - if is_accelerate_available(): - from accelerate import cpu_offload - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - - device = torch.device("cuda") - - for cpu_offloaded_model in [self.unet, self.clip, self.vae]: - if cpu_offloaded_model is not None: - cpu_offload(cpu_offloaded_model, device) - - @property - def _execution_device(self): - r""" - Returns the device on which the pipeline's models will be executed. After calling - `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module - hooks. - """ - if not hasattr(self.unet, "_hf_hook"): - return self.device - for module in self.unet.modules(): - if ( - hasattr(module, "_hf_hook") - and hasattr(module._hf_hook, "execution_device") - and module._hf_hook.execution_device is not None - ): - return torch.device(module._hf_hook.execution_device) - return self.device - def _encode_prompt(self, prompt): # get prompt text embeddings text_inputs = self.tokenizer( diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index c46dadb53e6a..cbdab46cb8e4 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -42,7 +42,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -87,7 +87,9 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: class AnimateDiffPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin + + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin + ): r""" Pipeline for text-to-video generation. @@ -411,66 +413,6 @@ def decode_latents(self, latents): video = video.float() return video - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py index 438f6736b6a7..96cd554c112a 100644 --- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py +++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py @@ -24,7 +24,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import logging, replace_example_docstring from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -49,7 +49,7 @@ """ -class AudioLDMPipeline(DiffusionPipeline): +class AudioLDMPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-audio generation using AudioLDM. @@ -96,22 +96,6 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - def _encode_prompt( self, prompt, diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index dc6df780005e..1622657ed161 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -173,22 +173,6 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - def enable_model_cpu_offload(self, gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index b186ec5cab2f..31b9107c7d2e 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -137,7 +137,12 @@ def retrieve_timesteps( class StableDiffusionControlNetPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance. @@ -233,39 +238,6 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -824,34 +796,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 10fc4384de29..a3cc0cf3108a 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -130,7 +130,12 @@ def prepare_image(image): class StableDiffusionControlNetImg2ImgPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for image-to-image generation using Stable Diffusion with ControlNet guidance. @@ -226,39 +231,6 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -866,34 +838,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 35a4ae67c9be..fcce6f88a3e6 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -241,7 +241,12 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False class StableDiffusionControlNetInpaintPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for image inpainting using Stable Diffusion with ControlNet guidance. @@ -351,39 +356,6 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -1076,34 +1048,6 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator): return image_latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index f6308f0c324d..212fba1089ca 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -42,7 +42,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from .multicontrolnet import MultiControlNetModel @@ -140,7 +140,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetInpaintPipeline( - DiffusionPipeline, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. @@ -229,39 +229,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -1021,34 +988,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 7c5a6e39abd4..02c5e3092696 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -55,7 +55,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -116,6 +116,7 @@ class StableDiffusionXLControlNetPipeline( DiffusionPipeline, + EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, @@ -222,39 +223,6 @@ def __init__( self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -873,34 +841,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 273297514a16..545285f4d5a9 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -54,7 +54,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -157,7 +157,7 @@ def retrieve_latents( class StableDiffusionXLControlNetImg2ImgPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for image-to-image generation using Stable Diffusion XL with ControlNet guidance. @@ -271,39 +271,6 @@ def __init__( self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -1030,34 +997,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py index 1d377dd97855..47c9cfdf19f7 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py @@ -23,7 +23,6 @@ from ....image_processor import PipelineImageInput, VaeImageProcessor from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel -from ....models.attention_processor import FusedAttnProcessor2_0 from ....models.lora import adjust_lora_scale_text_encoder from ....schedulers import KarrasDiffusionSchedulers from ....utils import ( @@ -35,7 +34,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline +from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -120,7 +119,12 @@ def retrieve_timesteps( class AltDiffusionPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using Alt Diffusion. @@ -252,35 +256,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def _encode_prompt( self, prompt, @@ -629,91 +604,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Alt Diffusion v1, v2, and Alt Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py index a9f058bb240b..14f65f0034a2 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -25,7 +25,6 @@ from ....image_processor import PipelineImageInput, VaeImageProcessor from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel -from ....models.attention_processor import FusedAttnProcessor2_0 from ....models.lora import adjust_lora_scale_text_encoder from ....schedulers import KarrasDiffusionSchedulers from ....utils import ( @@ -38,7 +37,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline +from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -160,7 +159,12 @@ def retrieve_timesteps( class AltDiffusionImg2ImgPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-guided image-to-image generation using Alt Diffusion. @@ -689,91 +693,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Alt Diffusion v1, v2, and Alt Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index e61c35f9c504..4d3415ca8139 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -26,7 +26,7 @@ from ....schedulers.scheduling_utils import SchedulerMixin from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline +from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -36,7 +36,9 @@ AUGS_CONST = ["A photo of ", "An image of ", "A picture of "] -class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionModelEditingPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): r""" Pipeline for text-to-image model editing. @@ -153,22 +155,6 @@ def append_ca(net_): self.projection_matrices = self.projection_matrices + [l.to_k for l in self.ca_clip_layers] self.og_matrices = self.og_matrices + [copy.deepcopy(l.to_k) for l in self.ca_clip_layers] - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py index a37f2870cb02..88b9498c9ab0 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline +from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -63,7 +63,7 @@ class StableDiffusionParadigmsPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a parallelized version of Stable Diffusion. @@ -146,39 +146,6 @@ def __init__( # attribute to wrap the unet with torch.nn.DataParallel when running multiple denoising steps on multiple GPUs self.wrapped_unet = self.unet - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index acaeab1c6f50..744ca3d19675 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -129,7 +129,12 @@ def retrieve_timesteps( class LatentConsistencyModelImg2ImgPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for image-to-image generation using a latent consistency model. @@ -209,67 +214,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt def encode_prompt( self, diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 469305f248e7..395e4575942d 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -107,7 +107,12 @@ def retrieve_timesteps( class LatentConsistencyModelPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using a latent consistency model. @@ -193,67 +198,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt def encode_prompt( self, diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py index 69bd0521d558..7c9617a3e572 100644 --- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py +++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py @@ -36,7 +36,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin if is_librosa_available(): @@ -64,7 +64,7 @@ """ -class MusicLDMPipeline(DiffusionPipeline): +class MusicLDMPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-audio generation using MusicLDM. @@ -113,22 +113,6 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - def _encode_prompt( self, prompt, diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py index 8effa94849c9..85c533acaddf 100644 --- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py @@ -25,7 +25,7 @@ from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .image_encoder import PaintByExampleImageEncoder @@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class PaintByExamplePipeline(DiffusionPipeline): +class PaintByExamplePipeline(DiffusionPipeline, EfficiencyMixin): r""" diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index c2947e3f8dfe..36fe8d779d27 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -2097,10 +2097,12 @@ def set_attention_slice(self, slice_size: Optional[int]): for module in modules: module.set_attention_slice(slice_size) + class EfficiencyMixin: r""" Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion) """ + def enable_vae_slicing(self): r""" Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to @@ -2213,4 +2215,4 @@ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") else: self.vae.unfuse_qkv_projections() - self.fusing_vae = False \ No newline at end of file + self.fusing_vae = False diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py index a1cb3f5af378..6513adf5f67d 100644 --- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py @@ -11,14 +11,14 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import SemanticStableDiffusionPipelineOutput logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SemanticStableDiffusionPipeline(DiffusionPipeline): +class SemanticStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with latent editing. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 5249e897ce85..c421f9d9d3f3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -115,7 +115,7 @@ def retrieve_timesteps( class StableDiffusionPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index fa797a7d9f3a..687dab69455d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -34,7 +34,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionImageVariationPipeline(DiffusionPipeline): +class StableDiffusionImageVariationPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline to generate image variations from an input image using Stable Diffusion. @@ -240,34 +240,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() def __call__( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 2746c6ad43ea..392de8e3c036 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -25,7 +25,6 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel -from ...models.attention_processor import FusedAttnProcessor2_0 from ...models.lora import adjust_lora_scale_text_encoder from ...schedulers import KarrasDiffusionSchedulers from ...utils import ( @@ -38,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -156,7 +155,12 @@ def retrieve_timesteps( class StableDiffusionImg2ImgPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-guided image-to-image generation using Stable Diffusion. @@ -768,95 +772,6 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index a8031b0a91c2..73b19d358917 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -25,12 +25,11 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AsymmetricAutoencoderKL, AutoencoderKL, ImageProjection, UNet2DConditionModel -from ...models.attention_processor import FusedAttnProcessor2_0 from ...models.lora import adjust_lora_scale_text_encoder from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -220,7 +219,12 @@ def retrieve_timesteps( class StableDiffusionInpaintPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-guided image inpainting using Stable Diffusion. @@ -910,95 +914,6 @@ def get_timesteps(self, num_inference_steps, strength, device): return timesteps, num_inference_steps - t_start - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index f4bb8267aac7..6a522cafd19f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import PIL_INTERPOLATION, deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -73,7 +73,7 @@ def retrieve_latents( class StableDiffusionInstructPix2PixPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion). @@ -807,34 +807,6 @@ def prepare_image_latents( return image_latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py index 8d272fa5748c..2712b17901a3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py @@ -27,7 +27,7 @@ from ...schedulers import EulerDiscreteScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -60,7 +60,7 @@ def preprocess(image): return image -class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixin): +class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin): r""" Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2. @@ -258,34 +258,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() def __call__( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index f2b77a6d17b9..be26d67322bc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -34,7 +34,7 @@ from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionPipelineOutput @@ -68,7 +68,7 @@ def preprocess(image): class StableDiffusionUpscalePipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-guided image super-resolution using Stable Diffusion 2. @@ -530,34 +530,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() def __call__( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index 8b66fa0f1972..23d9e382ac01 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -58,7 +58,7 @@ """ -class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableUnCLIPPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): """ Pipeline for text-to-image generation using stable unCLIP. @@ -155,22 +155,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder def _encode_prior_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index feb482fb429c..0a3a3c56c6f4 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -69,7 +69,7 @@ """ -class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): """ Pipeline for text-guided image-to-image generation using stable unCLIP. @@ -156,22 +156,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index a6e593282996..b267d88e67e0 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a return hidden_states -class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversionLoaderMixin): +class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin): r""" Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite. @@ -246,22 +246,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py index a6724e44334f..ee6b5a0ac739 100644 --- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py @@ -39,7 +39,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -235,7 +235,9 @@ def preprocess_mask(mask, batch_size: int = 1): return mask -class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionDiffEditPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): r""" @@ -371,39 +373,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py index 138e002bf0eb..76c2c23a3f2b 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -99,7 +99,7 @@ """ -class StableDiffusionGLIGENPipeline(DiffusionPipeline): +class StableDiffusionGLIGENPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). @@ -172,35 +172,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py index 6bd67a06cbbd..404f681c3a32 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py @@ -34,7 +34,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.clip_image_project_model import CLIPImageProjection from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -145,7 +145,7 @@ """ -class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): +class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). @@ -230,35 +230,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt def encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 602deeef194f..6171a22ddd70 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -26,7 +26,7 @@ from ...schedulers import LMSDiscreteScheduler from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput @@ -47,7 +47,7 @@ def apply_model(self, *args, **kwargs): return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample -class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionKDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py index 8b83c9aec43a..2742b8797c8f 100644 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py @@ -50,7 +50,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -91,6 +91,7 @@ def apply_model(self, *args, **kwargs): class StableDiffusionXLKDiffusionPipeline( DiffusionPipeline, + EfficiencyMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, @@ -196,39 +197,6 @@ def set_scheduler(self, scheduler_type: str): raise ValueError(f"Invalid scheduler type {scheduler_type}. Please choose one of {valid_samplers}.") - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -582,94 +550,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - @property def guidance_scale(self): return self._guidance_scale diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index 6553e9786488..8f517e3d035c 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -82,7 +82,12 @@ class LDM3DPipelineOutput(BaseOutput): class StableDiffusionLDM3DPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image and 3D generation using LDM3D. @@ -165,39 +170,6 @@ def __init__( self.image_processor = VaeImageProcessorLDM3D(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 51e6f47b83b6..29cde4ef328b 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -59,7 +59,9 @@ """ -class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin): +class StableDiffusionPanoramaPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin +): r""" Pipeline for text-to-image generation using MultiDiffusion. @@ -140,22 +142,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py index d72698cdc6a3..936cd5964666 100644 --- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py @@ -14,7 +14,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import StableDiffusionSafePipelineOutput from .safety_checker import SafeStableDiffusionSafetyChecker @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionPipelineSafe(DiffusionPipeline, IPAdapterMixin): +class StableDiffusionPipelineSafe(DiffusionPipeline, EfficiencyMixin, IPAdapterMixin): r""" Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py index 435bbca4d7d2..95a4215e6710 100644 --- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -98,7 +98,7 @@ def __call__( # Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input -class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin): +class StableDiffusionSAGPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -161,22 +161,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index e90fe6571f63..508c09a42fbb 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -52,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -148,6 +148,7 @@ def retrieve_timesteps( class StableDiffusionXLPipeline( DiffusionPipeline, + EfficiencyMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, @@ -257,39 +258,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt: str, @@ -744,93 +712,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index a6ed0768eb1b..8c71019b7647 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -35,7 +35,6 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models.attention_processor import ( AttnProcessor2_0, - FusedAttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, @@ -53,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -166,6 +165,7 @@ def retrieve_timesteps( class StableDiffusionXLImg2ImgPipeline( DiffusionPipeline, + EfficiencyMixin, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, @@ -278,39 +278,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -879,95 +846,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index f44d53fffaba..f45ebd273f52 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -36,7 +36,6 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models.attention_processor import ( AttnProcessor2_0, - FusedAttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, @@ -54,7 +53,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -311,6 +310,7 @@ def retrieve_timesteps( class StableDiffusionXLInpaintPipeline( DiffusionPipeline, + EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, @@ -429,39 +429,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None): dtype = next(self.image_encoder.parameters()).dtype @@ -1115,95 +1082,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py index 2e4225cf6145..280e75035f1e 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py @@ -41,7 +41,7 @@ scale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -118,7 +118,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLInstructPix2PixPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + FromSingleFileMixin, + StableDiffusionXLLoraLoaderMixin, ): r""" Pipeline for pixel-level image editing by following text instructions. Based on Stable Diffusion XL. @@ -205,38 +209,6 @@ def __init__( else: self.watermark = None - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. - - When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several - steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. - - When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in - several steps. This is useful to save a large amount of memory and to allow the processing of larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt: str, @@ -621,34 +593,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index f5d3b66f326b..271082df4c4e 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -163,7 +163,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class StableDiffusionAdapterPipeline(DiffusionPipeline): +class StableDiffusionAdapterPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter https://arxiv.org/abs/2302.08453 @@ -248,22 +248,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -628,34 +612,6 @@ def _default_height_width(self, height, width, image): return height, width - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 0c812179dac1..1b15f1ec3107 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -51,7 +51,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -181,6 +181,7 @@ def retrieve_timesteps( class StableDiffusionXLAdapterPipeline( DiffusionPipeline, + EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, @@ -270,39 +271,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.default_sample_size = self.unet.config.sample_size - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt def encode_prompt( self, @@ -788,34 +756,6 @@ def _default_height_width(self, height, width, image): return height, width - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index eb34910b7008..db0c8e54fb6f 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import TextToVideoSDPipelineOutput @@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: return outputs -class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-to-video generation. @@ -129,39 +129,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -484,34 +451,6 @@ def prepare_latents( latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 2a41d9a8f735..dcc7c6a7df0f 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from . import TextToVideoSDPipelineOutput @@ -157,7 +157,7 @@ def preprocess_video(video): return video -class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class VideoToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided video-to-video generation. @@ -205,39 +205,6 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, @@ -591,34 +558,6 @@ def prepare_latents(self, video, timestep, batch_size, dtype, device, generator= return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py index fc34d50a50dd..991dfeee0a55 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py @@ -17,7 +17,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from ..stable_diffusion import StableDiffusionSafetyChecker @@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s return warped_latents -class TextToVideoZeroPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoZeroPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for zero-shot text-to-video generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py index 4fe2279a468a..afd81d04f3fe 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin if is_invisible_watermark_available(): @@ -327,6 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class TextToVideoZeroSDXLPipeline( DiffusionPipeline, + EfficiencyMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, ): @@ -436,22 +437,6 @@ def prepare_extra_step_kwargs(self, generator, eta): extra_step_kwargs["generator"] = generator return extra_step_kwargs - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.upcast_vae def upcast_vae(self): dtype = self.vae.dtype diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index 38c12edb2d43..cad7cb381e64 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -21,7 +21,7 @@ from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.outputs import BaseOutput from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .modeling_text_decoder import UniDiffuserTextDecoder from .modeling_uvit import UniDiffuserModel @@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput): text: Optional[Union[List[str], List[List[str]]]] -class UniDiffuserPipeline(DiffusionPipeline): +class UniDiffuserPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned image generation, image-conditioned text generation, and joint image-text generation. @@ -135,39 +135,6 @@ def __init__( # TODO: handle safety checking? self.safety_checker = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature From 4a7fc38b7d600a1b1abb73624e770fdf780d73a5 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Tue, 16 Jan 2024 00:46:49 +0800 Subject: [PATCH 03/17] add mixin to rdm & restore audioldm2 & fix quality checks --- examples/research_projects/rdm/pipeline_rdm.py | 3 ++- .../pipelines/audioldm2/pipeline_audioldm2.py | 16 ++++++++++++++++ .../pipeline_stable_diffusion.py | 7 ++++++- .../pipeline_stable_diffusion_k_diffusion.py | 4 +++- src/diffusers/utils/dummy_pt_objects.py | 15 +++++++++++++++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py index 5398ee2e5331..dbc98d028fad 100644 --- a/examples/research_projects/rdm/pipeline_rdm.py +++ b/examples/research_projects/rdm/pipeline_rdm.py @@ -19,6 +19,7 @@ UNet2DConditionModel, ) from diffusers.image_processor import VaeImageProcessor +from diffusers.pipelines.pipeline_utils import EfficiencyMixin from diffusers.utils import logging from diffusers.utils.torch_utils import randn_tensor @@ -26,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class RDMPipeline(DiffusionPipeline): +class RDMPipeline(DiffusionPipeline, EfficiencyMixin): r""" Pipeline for text-to-image generation using Retrieval Augmented Diffusion. diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index 1622657ed161..64f93e3eefea 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -173,6 +173,22 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) + # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.enable_vae_slicing + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.disable_vae_slicing + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + def enable_model_cpu_offload(self, gpu_id=0): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index c421f9d9d3f3..860dfeeb85af 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -115,7 +115,12 @@ def retrieve_timesteps( class StableDiffusionPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 6171a22ddd70..ea1beb6788c3 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -47,7 +47,9 @@ def apply_model(self, *args, **kwargs): return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample -class StableDiffusionKDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionKDiffusionPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index a4f5436038ea..19f5ac445a9e 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -570,6 +570,21 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) +class EfficiencyMixin(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class ImagePipelineOutput(metaclass=DummyObject): _backends = ["torch"] From cc4f805b07e08b5e4a743a64c9e64ce7ed1aaf75 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Mon, 19 Feb 2024 21:42:01 +0800 Subject: [PATCH 04/17] rebase on main branch --- examples/community/ip_adapter_face_id.py | 126 +----------------- .../pipeline_animatediff_img2video.py | 67 +--------- .../pipeline_stable_diffusion_xl_ipex.py | 61 --------- .../models/unets/unet_3d_condition.py | 39 ++++++ src/diffusers/models/unets/unet_i2vgen_xl.py | 38 ++++++ .../models/unets/unet_motion_model.py | 39 ++++++ .../animatediff/pipeline_animatediff.py | 2 - .../pipeline_animatediff_video2video.py | 65 +-------- .../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 68 +--------- src/diffusers/pipelines/pia/pipeline_pia.py | 71 ++-------- .../pipeline_stable_diffusion_img2img.py | 33 ----- .../pipeline_stable_diffusion_inpaint.py | 33 ----- 12 files changed, 142 insertions(+), 500 deletions(-) diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py index dfd6a9df6eb1..f92b91d803c5 100644 --- a/examples/community/ip_adapter_face_id.py +++ b/examples/community/ip_adapter_face_id.py @@ -26,9 +26,8 @@ from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.models.attention_processor import FusedAttnProcessor2_0 from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -415,7 +414,12 @@ def retrieve_timesteps( class IPAdapterFaceIDStableDiffusionPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + LoraLoaderMixin, + IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -727,35 +731,6 @@ def set_ip_adapter_scale(self, scale): if isinstance(attn_processor, (LoRAIPAdapterAttnProcessor, LoRAIPAdapterAttnProcessor2_0)): attn_processor.scale = scale - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def _encode_prompt( self, prompt, @@ -1080,93 +1055,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype latents = latents * self.scheduler.init_noise_sigma return latents - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """ - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - """ - self.fusing_unet = False - self.fusing_vae = False - - if unet: - self.fusing_unet = True - self.unet.fuse_qkv_projections() - self.unet.set_attn_processor(FusedAttnProcessor2_0()) - - if vae: - if not isinstance(self.vae, AutoencoderKL): - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") - - self.fusing_vae = True - self.vae.fuse_qkv_projections() - self.vae.set_attn_processor(FusedAttnProcessor2_0()) - - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): - """Disable QKV projection fusion if enabled. - - - - This API is 🧪 experimental. - - - - Args: - unet (`bool`, defaults to `True`): To apply fusion on the UNet. - vae (`bool`, defaults to `True`): To apply fusion on the VAE. - - """ - if unet: - if not self.fusing_unet: - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") - else: - self.unet.unfuse_qkv_projections() - self.fusing_unet = False - - if vae: - if not self.fusing_vae: - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") - else: - self.vae.unfuse_qkv_projections() - self.fusing_vae = False - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index 826742f9afc8..d666e554f07d 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -26,7 +26,7 @@ from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unet_motion_model import MotionAdapter -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -230,7 +230,9 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput): frames: Union[torch.Tensor, np.ndarray] -class AnimateDiffImgToVideoPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin): +class AnimateDiffImgToVideoPipeline( + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin +): r""" Pipeline for text-to-video generation. @@ -527,67 +529,6 @@ def decode_latents(self, latents): video = video.float() return video - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature diff --git a/examples/community/pipeline_stable_diffusion_xl_ipex.py b/examples/community/pipeline_stable_diffusion_xl_ipex.py index c57d58bb58ba..68ad5dbec77d 100644 --- a/examples/community/pipeline_stable_diffusion_xl_ipex.py +++ b/examples/community/pipeline_stable_diffusion_xl_ipex.py @@ -267,39 +267,6 @@ def __init__( else: self.watermark = None - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt: str, @@ -701,34 +668,6 @@ def upcast_vae(self): self.vae.decoder.conv_in.to(dtype) self.vae.decoder.mid_block.to(dtype) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ diff --git a/src/diffusers/models/unets/unet_3d_condition.py b/src/diffusers/models/unets/unet_3d_condition.py index 1d5bd57cf8e0..b7641a96a7a1 100644 --- a/src/diffusers/models/unets/unet_3d_condition.py +++ b/src/diffusers/models/unets/unet_3d_condition.py @@ -27,6 +27,7 @@ from ..attention_processor import ( ADDED_KV_ATTENTION_PROCESSORS, CROSS_ATTENTION_PROCESSORS, + Attention, AttentionProcessor, AttnAddedKVProcessor, AttnProcessor, @@ -503,6 +504,44 @@ def disable_freeu(self): if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None: setattr(upsample_block, k, None) + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, + key, value) are fused. For cross-attention modules, key and value projection matrices are fused. + + + + This API is 🧪 experimental. + + + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + + + This API is 🧪 experimental. + + + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unload_lora def unload_lora(self): """Unloads LoRA weights.""" diff --git a/src/diffusers/models/unets/unet_i2vgen_xl.py b/src/diffusers/models/unets/unet_i2vgen_xl.py index 5dce87254986..a096f842ab6c 100644 --- a/src/diffusers/models/unets/unet_i2vgen_xl.py +++ b/src/diffusers/models/unets/unet_i2vgen_xl.py @@ -474,6 +474,44 @@ def disable_freeu(self): if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None: setattr(upsample_block, k, None) + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, + key, value) are fused. For cross-attention modules, key and value projection matrices are fused. + + + + This API is 🧪 experimental. + + + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + + + This API is 🧪 experimental. + + + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + def forward( self, sample: torch.FloatTensor, diff --git a/src/diffusers/models/unets/unet_motion_model.py b/src/diffusers/models/unets/unet_motion_model.py index 9cb0f42c85ef..ebdddf09bd63 100644 --- a/src/diffusers/models/unets/unet_motion_model.py +++ b/src/diffusers/models/unets/unet_motion_model.py @@ -23,6 +23,7 @@ from ..attention_processor import ( ADDED_KV_ATTENTION_PROCESSORS, CROSS_ATTENTION_PROCESSORS, + Attention, AttentionProcessor, AttnAddedKVProcessor, AttnProcessor, @@ -700,6 +701,44 @@ def disable_freeu(self) -> None: if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None: setattr(upsample_block, k, None) + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, + key, value) are fused. For cross-attention modules, key and value projection matrices are fused. + + + + This API is 🧪 experimental. + + + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + + + This API is 🧪 experimental. + + + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + def forward( self, sample: torch.FloatTensor, diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index cbdab46cb8e4..050475a1ad26 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -87,9 +87,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: class AnimateDiffPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin - ): r""" Pipeline for text-to-video generation. diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index f5ada63dfdfc..599f0497da97 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -35,7 +35,7 @@ from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -165,7 +165,7 @@ def retrieve_timesteps( class AnimateDiffVideoToVideoPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin + DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin ): r""" Pipeline for video-to-video generation. @@ -454,67 +454,6 @@ def decode_latents(self, latents): video = video.float() return video - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 5354f6643cb7..ac1f44a2f348 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -31,7 +31,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -103,7 +103,10 @@ class I2VGenXLPipelineOutput(BaseOutput): frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]] -class I2VGenXLPipeline(DiffusionPipeline): +class I2VGenXLPipeline( + DiffusionPipeline, + EfficiencyMixin, +): r""" Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/). @@ -161,39 +164,6 @@ def guidance_scale(self): def do_classifier_free_guidance(self): return self._guidance_scale > 1 - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def encode_prompt( self, prompt, @@ -542,34 +512,6 @@ def prepare_latents( latents = latents * self.scheduler.init_noise_sigma return latents - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - @torch.no_grad() @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 1a385ea462c6..90468b0a2127 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -46,7 +46,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline +from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -211,7 +211,13 @@ class PIAPipelineOutput(BaseOutput): class PIAPipeline( - DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FromSingleFileMixin, FreeInitMixin + DiffusionPipeline, + EfficiencyMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FromSingleFileMixin, + FreeInitMixin, ): r""" Pipeline for text-to-video generation. @@ -500,67 +506,6 @@ def decode_latents(self, latents): video = video.float() return video - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. - - The suffixes after the scaling factors represent the stages where they are being applied. - - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. - - Args: - s1 (`float`): - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - s2 (`float`): - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to - mitigate "oversmoothing effect" in the enhanced denoising process. - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. - """ - if not hasattr(self, "unet"): - raise ValueError("The pipeline must have `unet` for using FreeU.") - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu - def disable_freeu(self): - """Disables the FreeU mechanism if enabled.""" - self.unet.disable_freeu() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs def prepare_extra_step_kwargs(self, generator, eta): # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 392de8e3c036..6df7d62d7c9b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -292,39 +292,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 73b19d358917..6652aecdcb5f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -364,39 +364,6 @@ def __init__( ) self.register_to_config(requires_safety_checker=requires_safety_checker) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt( self, From fc71e97477e85c53b0f8f8cccf152561826d78b5 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Mon, 19 Feb 2024 22:23:18 +0800 Subject: [PATCH 05/17] init PipelineEfficiencyFunctionTesterMixin --- .../test_stable_diffusion.py | 13 +- .../test_stable_diffusion_xl.py | 44 ++---- tests/pipelines/test_pipelines_common.py | 129 ++++++++++++++++++ 3 files changed, 151 insertions(+), 35 deletions(-) diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index dcc4dadf992b..a11ca7b4a233 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -52,14 +52,23 @@ TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS, ) -from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin +from ..test_pipelines_common import ( + PipelineEfficiencyFunctionTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, +) enable_full_determinism() class StableDiffusion2PipelineFastTests( - PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase + PipelineEfficiencyFunctionTesterMixin, + PipelineLatentTesterMixin, + PipelineKarrasSchedulerTesterMixin, + PipelineTesterMixin, + unittest.TestCase, ): pipeline_class = StableDiffusionPipeline params = TEXT_TO_IMAGE_PARAMS diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py index 16ef7e3009bd..b9e01f598cde 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -49,14 +49,23 @@ TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS, ) -from ..test_pipelines_common import PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin +from ..test_pipelines_common import ( + PipelineEfficiencyFunctionTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + SDXLOptionalComponentsTesterMixin, +) enable_full_determinism() class StableDiffusionXLPipelineFastTests( - PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase + PipelineEfficiencyFunctionTesterMixin, + PipelineLatentTesterMixin, + PipelineTesterMixin, + SDXLOptionalComponentsTesterMixin, + unittest.TestCase, ): pipeline_class = StableDiffusionXLPipeline params = TEXT_TO_IMAGE_PARAMS @@ -939,37 +948,6 @@ def test_stable_diffusion_xl_save_from_pretrained(self): assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 - def test_stable_diffusion_xl_with_fused_qkv_projections(self): - device = "cpu" # ensure determinism for the device-dependent torch.Generator - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLPipeline(**components) - sd_pipe = sd_pipe.to(device) - sd_pipe.set_progress_bar_config(disable=None) - - inputs = self.get_dummy_inputs(device) - image = sd_pipe(**inputs).images - original_image_slice = image[0, -3:, -3:, -1] - - sd_pipe.fuse_qkv_projections() - inputs = self.get_dummy_inputs(device) - image = sd_pipe(**inputs).images - image_slice_fused = image[0, -3:, -3:, -1] - - sd_pipe.unfuse_qkv_projections() - inputs = self.get_dummy_inputs(device) - image = sd_pipe(**inputs).images - image_slice_disabled = image[0, -3:, -3:, -1] - - assert np.allclose( - original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2 - ), "Fusion of QKV projections shouldn't affect the outputs." - assert np.allclose( - image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2 - ), "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." - assert np.allclose( - original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2 - ), "Original outputs should match when fused QKV projections are disabled." - def test_pipeline_interrupt(self): components = self.get_dummy_components() sd_pipe = StableDiffusionXLPipeline(**components) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 7f51847caf07..58929296e34d 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -59,6 +59,135 @@ def check_same_shape(tensor_list): return all(shape == shapes[0] for shape in shapes[1:]) +class PipelineEfficiencyFunctionTesterMixin: + """ + This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. + It provides a set of common tests for PyTorch pipeline that inherit from EfficiencyMixin, e.g. vae_slicing, vae_tiling, freeu, etc. + """ + + def test_vae_slicing(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + # components["scheduler"] = LMSDiscreteScheduler.from_config(components["scheduler"].config) + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + image_count = 4 + + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * image_count + output_1 = pipe(**inputs) + + # make sure sliced vae decode yields the same result + pipe.enable_vae_slicing() + inputs = self.get_dummy_inputs(device) + inputs["prompt"] = [inputs["prompt"]] * image_count + output_2 = pipe(**inputs) + + # there is a small discrepancy at image borders vs. full batch decode + assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 3e-3 + + def test_vae_tiling(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + + # make sure here that pndm scheduler skips prk + components["safety_checker"] = None + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + + # Test that tiled decode at 512x512 yields the same result as the non-tiled decode + generator = torch.Generator(device=device).manual_seed(0) + output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + + # make sure tiled vae decode yields the same result + pipe.enable_vae_tiling() + generator = torch.Generator(device=device).manual_seed(0) + output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + + assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 5e-1 + + # test that tiled decode works with various shapes + shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] + for shape in shapes: + zeros = torch.zeros(shape).to(device) + pipe.vae.decode(zeros) + + def test_freeu_enabled(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) + output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + assert not np.allclose( + output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1] + ), "Enabling of FreeU should lead to different results." + + def test_freeu_disabled(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + prompt = "hey" + output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) + pipe.disable_freeu() + + freeu_keys = {"s1", "s2", "b1", "b2"} + for upsample_block in pipe.unet.up_blocks: + for key in freeu_keys: + assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None." + + output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + + assert np.allclose( + output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1] + ), "Disabling of FreeU should lead to results similar to the default pipeline results." + + def test_fused_qkv_projections(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + original_image_slice = image[0, -3:, -3:, -1] + + pipe.fuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_fused = image[0, -3:, -3:, -1] + + pipe.unfuse_qkv_projections() + inputs = self.get_dummy_inputs(device) + image = pipe(**inputs).images + image_slice_disabled = image[0, -3:, -3:, -1] + + assert np.allclose( + original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2 + ), "Fusion of QKV projections shouldn't affect the outputs." + assert np.allclose( + image_slice_fused, image_slice_disabled, atol=1e-2, rtol=1e-2 + ), "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled." + assert np.allclose( + original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2 + ), "Original outputs should match when fused QKV projections are disabled." + + class PipelineLatentTesterMixin: """ This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. From 95f53e685eb4930e281a338dffcae7ab3db1f262 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Wed, 21 Feb 2024 10:46:09 +0800 Subject: [PATCH 06/17] rename EfficiencyMixin to LatentDiffusionMixin --- examples/community/composable_stable_diffusion.py | 4 ++-- examples/community/gluegen.py | 4 ++-- examples/community/ip_adapter_face_id.py | 4 ++-- examples/community/latent_consistency_interpolate.py | 4 ++-- examples/community/llm_grounded_diffusion.py | 4 ++-- examples/community/lpw_stable_diffusion.py | 4 ++-- examples/community/lpw_stable_diffusion_xl.py | 4 ++-- examples/community/pipeline_animatediff_controlnet.py | 4 ++-- examples/community/pipeline_animatediff_img2video.py | 4 ++-- examples/community/pipeline_demofusion_sdxl.py | 4 ++-- examples/community/pipeline_sdxl_style_aligned.py | 6 +++--- .../pipeline_stable_diffusion_xl_controlnet_adapter.py | 4 ++-- ...ine_stable_diffusion_xl_controlnet_adapter_inpaint.py | 4 ++-- examples/community/pipeline_zero1to3.py | 6 +++--- examples/community/sd_text2img_k_diffusion.py | 4 ++-- examples/community/seed_resize_stable_diffusion.py | 4 ++-- examples/community/speech_to_image_diffusion.py | 4 ++-- examples/community/stable_diffusion_comparison.py | 4 ++-- examples/community/stable_diffusion_ipex.py | 6 ++++-- examples/community/stable_diffusion_mega.py | 4 ++-- examples/community/stable_diffusion_repaint.py | 6 ++++-- examples/community/text_inpainting.py | 4 ++-- .../controlnetxs/pipeline_controlnet_xs.py | 4 ++-- .../controlnetxs/pipeline_controlnet_xs_sd_xl.py | 4 ++-- examples/research_projects/rdm/pipeline_rdm.py | 4 ++-- src/diffusers/__init__.py | 4 ++-- src/diffusers/pipelines/__init__.py | 4 ++-- .../pipelines/animatediff/pipeline_animatediff.py | 9 +++++++-- .../animatediff/pipeline_animatediff_video2video.py | 9 +++++++-- src/diffusers/pipelines/audioldm/pipeline_audioldm.py | 4 ++-- src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_img2img.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_inpaint.py | 4 ++-- .../controlnet/pipeline_controlnet_inpaint_sd_xl.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_sd_xl.py | 4 ++-- .../controlnet/pipeline_controlnet_sd_xl_img2img.py | 8 ++++++-- .../deprecated/alt_diffusion/pipeline_alt_diffusion.py | 4 ++-- .../alt_diffusion/pipeline_alt_diffusion_img2img.py | 4 ++-- .../pipeline_stable_diffusion_model_editing.py | 4 ++-- .../pipeline_stable_diffusion_paradigms.py | 4 ++-- src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 4 ++-- .../pipeline_latent_consistency_img2img.py | 4 ++-- .../pipeline_latent_consistency_text2img.py | 4 ++-- src/diffusers/pipelines/musicldm/pipeline_musicldm.py | 4 ++-- .../paint_by_example/pipeline_paint_by_example.py | 4 ++-- src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++-- src/diffusers/pipelines/pipeline_utils.py | 4 ++-- .../pipeline_semantic_stable_diffusion.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_image_variation.py | 4 ++-- .../pipeline_stable_diffusion_img2img.py | 4 ++-- .../pipeline_stable_diffusion_inpaint.py | 4 ++-- .../pipeline_stable_diffusion_instruct_pix2pix.py | 4 ++-- .../pipeline_stable_diffusion_latent_upscale.py | 4 ++-- .../pipeline_stable_diffusion_upscale.py | 4 ++-- .../pipelines/stable_diffusion/pipeline_stable_unclip.py | 4 ++-- .../stable_diffusion/pipeline_stable_unclip_img2img.py | 6 ++++-- .../pipeline_stable_diffusion_attend_and_excite.py | 4 ++-- .../pipeline_stable_diffusion_diffedit.py | 4 ++-- .../pipeline_stable_diffusion_gligen.py | 4 ++-- .../pipeline_stable_diffusion_gligen_text_image.py | 4 ++-- .../pipeline_stable_diffusion_k_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_xl_k_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_ldm3d.py | 4 ++-- .../pipeline_stable_diffusion_panorama.py | 4 ++-- .../pipeline_stable_diffusion_safe.py | 4 ++-- .../pipeline_stable_diffusion_sag.py | 4 ++-- .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 4 ++-- .../pipeline_stable_diffusion_xl_img2img.py | 4 ++-- .../pipeline_stable_diffusion_xl_inpaint.py | 4 ++-- .../pipeline_stable_diffusion_xl_instruct_pix2pix.py | 4 ++-- .../t2i_adapter/pipeline_stable_diffusion_adapter.py | 4 ++-- .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 4 ++-- .../pipeline_text_to_video_synth.py | 4 ++-- .../pipeline_text_to_video_synth_img2img.py | 4 ++-- .../pipeline_text_to_video_zero.py | 4 ++-- .../pipeline_text_to_video_zero_sdxl.py | 4 ++-- .../pipelines/unidiffuser/pipeline_unidiffuser.py | 4 ++-- src/diffusers/utils/dummy_pt_objects.py | 2 +- .../stable_diffusion_2/test_stable_diffusion.py | 4 ++-- .../stable_diffusion_xl/test_stable_diffusion_xl.py | 4 ++-- tests/pipelines/test_pipelines_common.py | 4 ++-- 83 files changed, 187 insertions(+), 167 deletions(-) diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index eb099f9398b9..c7b91f94f294 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -22,7 +22,7 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import ( @@ -39,7 +39,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class ComposableStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): +class ComposableStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py index 19cbf6cb3b82..f0ace91d683c 100644 --- a/examples/community/gluegen.py +++ b/examples/community/gluegen.py @@ -10,7 +10,7 @@ from diffusers.loaders import LoraLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -194,7 +194,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class GlueGenStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin, LoraLoaderMixin): +class GlueGenStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin, LoraLoaderMixin): def __init__( self, vae: AutoencoderKL, diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py index f92b91d803c5..d1fa98bc9df9 100644 --- a/examples/community/ip_adapter_face_id.py +++ b/examples/community/ip_adapter_face_id.py @@ -27,7 +27,7 @@ from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -415,7 +415,7 @@ def retrieve_timesteps( class IPAdapterFaceIDStableDiffusionPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py index a70fb6a467f5..44a7c8bec07a 100644 --- a/examples/community/latent_consistency_interpolate.py +++ b/examples/community/latent_consistency_interpolate.py @@ -9,7 +9,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import LCMScheduler from diffusers.utils import ( @@ -190,7 +190,7 @@ def slerp( class LatentConsistencyModelWalkPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a latent consistency model. diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py index 39d530e09b1c..7f7b0fad39f8 100644 --- a/examples/community/llm_grounded_diffusion.py +++ b/examples/community/llm_grounded_diffusion.py @@ -35,7 +35,7 @@ from diffusers.models.attention_processor import AttnProcessor2_0 from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines import DiffusionPipeline -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -269,7 +269,7 @@ def __call__( class LLMGroundedDiffusionPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index debaef7d4642..819787ee9a02 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -13,7 +13,7 @@ from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -409,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8): class StableDiffusionLongPromptWeightingPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py index 83f0e8b7818d..c5c93d9ea381 100644 --- a/examples/community/lpw_stable_diffusion_xl.py +++ b/examples/community/lpw_stable_diffusion_xl.py @@ -30,7 +30,7 @@ LoRAXFormersAttnProcessor, XFormersAttnProcessor, ) -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -546,7 +546,7 @@ def retrieve_timesteps( class SDXLLongPromptWeightingPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index dc7b6302c5ea..a1d6c5605102 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -28,7 +28,7 @@ from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unets.unet_motion_model import MotionAdapter from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -112,7 +112,7 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput): class AnimateDiffControlNetPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin ): r""" Pipeline for text-to-video generation. diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index d666e554f07d..d691ece93050 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -26,7 +26,7 @@ from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unet_motion_model import MotionAdapter -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -231,7 +231,7 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput): class AnimateDiffImgToVideoPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin ): r""" Pipeline for text-to-video generation. diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py index 69624271beed..e7dc269a1e71 100644 --- a/examples/community/pipeline_demofusion_sdxl.py +++ b/examples/community/pipeline_demofusion_sdxl.py @@ -23,7 +23,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( is_accelerate_available, @@ -94,7 +94,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class DemoFusionSDXLPipeline( - DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py index 2b6047e97bfb..b547c35f1123 100644 --- a/examples/community/pipeline_sdxl_style_aligned.py +++ b/examples/community/pipeline_sdxl_style_aligned.py @@ -51,7 +51,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -85,7 +85,7 @@ >>> from typing import List >>> import torch - >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,EfficiencyMixin + >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,LatentDiffusionMixin >>> from PIL import Image >>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo" @@ -389,7 +389,7 @@ def retrieve_latents( class StyleAlignedSDXLPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py index 490fde58b916..49a46e9ba4be 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py @@ -33,7 +33,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -159,7 +159,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetAdapterPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py index 17777090df24..5347ab949697 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py @@ -52,7 +52,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -305,7 +305,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetAdapterInpaintPipeline( - DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py index 6e1c1d015e48..1656ae674382 100644 --- a/examples/community/pipeline_zero1to3.py +++ b/examples/community/pipeline_zero1to3.py @@ -22,10 +22,10 @@ # randn_tensor, # replace_example_docstring, # ) -# from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +# from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin # from . import StableDiffusionPipelineOutput # from .safety_checker import StableDiffusionSafetyChecker -from diffusers import AutoencoderKL, DiffusionPipeline, EfficiencyMixin, UNet2DConditionModel +from diffusers import AutoencoderKL, DiffusionPipeline, LatentDiffusionMixin, UNet2DConditionModel from diffusers.configuration_utils import ConfigMixin, FrozenDict from diffusers.models.modeling_utils import ModelMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -66,7 +66,7 @@ def forward(self, x): return self.projection(x) -class Zero1to3StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): +class Zero1to3StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for single view conditioned novel view generation using Zero1to3. diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index 8928eb383b76..3ca36872830e 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -19,7 +19,7 @@ import torch from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser -from diffusers import DiffusionPipeline, EfficiencyMixin, LMSDiscreteScheduler +from diffusers import DiffusionPipeline, LatentDiffusionMixin, LMSDiscreteScheduler from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import logging @@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs): return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample -class StableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py index c84c222f2360..f5e519e0f23a 100644 --- a/examples/community/seed_resize_stable_diffusion.py +++ b/examples/community/seed_resize_stable_diffusion.py @@ -9,7 +9,7 @@ from diffusers import DiffusionPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -19,7 +19,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SeedResizeStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): +class SeedResizeStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py index 7f0bf4bf0293..3633348a16df 100644 --- a/examples/community/speech_to_image_diffusion.py +++ b/examples/community/speech_to_image_diffusion.py @@ -18,7 +18,7 @@ PNDMScheduler, UNet2DConditionModel, ) -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import logging @@ -27,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SpeechToImagePipeline(DiffusionPipeline, EfficiencyMixin): +class SpeechToImagePipeline(DiffusionPipeline, LatentDiffusionMixin): def __init__( self, speech_model: WhisperForConditionalGeneration, diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py index 49c9ffa86a61..3723aa01f541 100644 --- a/examples/community/stable_diffusion_comparison.py +++ b/examples/community/stable_diffusion_comparison.py @@ -12,7 +12,7 @@ StableDiffusionPipeline, UNet2DConditionModel, ) -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -23,7 +23,7 @@ pipe4_model_id = "CompVis/stable-diffusion-v1-4" -class StableDiffusionComparisonPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionComparisonPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for parallel comparison of Stable Diffusion v1-v4 This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py index 48048d5831f4..fba1f145baa6 100644 --- a/examples/community/stable_diffusion_ipex.py +++ b/examples/community/stable_diffusion_ipex.py @@ -23,7 +23,7 @@ from diffusers.configuration_utils import FrozenDict from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -60,7 +60,9 @@ """ -class StableDiffusionIPEXPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionIPEXPipeline( + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): r""" Pipeline for text-to-image generation using Stable Diffusion on IPEX. diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py index 470cbab5a527..3bdaa1d8dd58 100644 --- a/examples/community/stable_diffusion_mega.py +++ b/examples/community/stable_diffusion_mega.py @@ -16,7 +16,7 @@ UNet2DConditionModel, ) from diffusers.configuration_utils import FrozenDict -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import deprecate, logging @@ -24,7 +24,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionMegaPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionMegaPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index 38400b578f97..5ee194ab80c8 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -24,7 +24,7 @@ from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel from diffusers.configuration_utils import FrozenDict, deprecate from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import ( StableDiffusionSafetyChecker, @@ -139,7 +139,9 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class StableDiffusionRepaintPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableDiffusionRepaintPipeline( + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py index 80889d7897bd..4276de3f92d4 100644 --- a/examples/community/text_inpainting.py +++ b/examples/community/text_inpainting.py @@ -13,7 +13,7 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -23,7 +23,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class TextInpainting(DiffusionPipeline, EfficiencyMixin): +class TextInpainting(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text based inpainting using Stable Diffusion. Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py index 63461fadfc14..6937bf72b86b 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py @@ -26,7 +26,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -44,7 +44,7 @@ class StableDiffusionControlNetXSPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance. diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py index bb2e6ad1dff7..50bc6803089e 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py @@ -31,7 +31,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -53,7 +53,7 @@ class StableDiffusionXLControlNetXSPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py index dbc98d028fad..7a5d7cdb5b47 100644 --- a/examples/research_projects/rdm/pipeline_rdm.py +++ b/examples/research_projects/rdm/pipeline_rdm.py @@ -19,7 +19,7 @@ UNet2DConditionModel, ) from diffusers.image_processor import VaeImageProcessor -from diffusers.pipelines.pipeline_utils import EfficiencyMixin +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.utils import logging from diffusers.utils.torch_utils import randn_tensor @@ -27,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class RDMPipeline(DiffusionPipeline, EfficiencyMixin): +class RDMPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Retrieval Augmented Diffusion. diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 5e276845a85b..dbe3394b0438 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -121,9 +121,9 @@ "DDPMPipeline", "DiffusionPipeline", "DiTPipeline", - "EfficiencyMixin", "ImagePipelineOutput", "KarrasVePipeline", + "LatentDiffusionMixin", "LDMPipeline", "LDMSuperResolutionPipeline", "PNDMPipeline", @@ -506,9 +506,9 @@ DDPMPipeline, DiffusionPipeline, DiTPipeline, - EfficiencyMixin, ImagePipelineOutput, KarrasVePipeline, + LatentDiffusionMixin, LDMPipeline, LDMSuperResolutionPipeline, PNDMPipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 8ccade3aa228..af557aee6fb9 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -48,7 +48,7 @@ _import_structure["pipeline_utils"] = [ "AudioPipelineOutput", "DiffusionPipeline", - "EfficiencyMixin", + "LatentDiffusionMixin", "ImagePipelineOutput", ] _import_structure["deprecated"].extend( @@ -329,8 +329,8 @@ from .pipeline_utils import ( AudioPipelineOutput, DiffusionPipeline, - EfficiencyMixin, ImagePipelineOutput, + LatentDiffusionMixin, ) try: diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 050475a1ad26..11769c30f3f8 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -42,7 +42,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -87,7 +87,12 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: class AnimateDiffPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin + DiffusionPipeline, + LatentDiffusionMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FreeInitMixin, ): r""" Pipeline for text-to-video generation. diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 599f0497da97..4ce69450ae9e 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -35,7 +35,7 @@ from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -165,7 +165,12 @@ def retrieve_timesteps( class AnimateDiffVideoToVideoPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, FreeInitMixin + DiffusionPipeline, + LatentDiffusionMixin, + TextualInversionLoaderMixin, + IPAdapterMixin, + LoraLoaderMixin, + FreeInitMixin, ): r""" Pipeline for video-to-video generation. diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py index 96cd554c112a..c1661839cc6d 100644 --- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py +++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py @@ -24,7 +24,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import logging, replace_example_docstring from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -49,7 +49,7 @@ """ -class AudioLDMPipeline(DiffusionPipeline, EfficiencyMixin): +class AudioLDMPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-audio generation using AudioLDM. diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index 64f93e3eefea..27118d30d3c8 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -173,7 +173,7 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.enable_vae_slicing + # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.enable_vae_slicing def enable_vae_slicing(self): r""" Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to @@ -181,7 +181,7 @@ def enable_vae_slicing(self): """ self.vae.enable_slicing() - # Copied from diffusers.pipelines.pipeline_utils.EfficiencyMixin.disable_vae_slicing + # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.disable_vae_slicing def disable_vae_slicing(self): r""" Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index 31b9107c7d2e..e65df8b78143 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -138,7 +138,7 @@ def retrieve_timesteps( class StableDiffusionControlNetPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index a3cc0cf3108a..94fc3f8b646a 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -131,7 +131,7 @@ def prepare_image(image): class StableDiffusionControlNetImg2ImgPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index fcce6f88a3e6..7d456793e293 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -242,7 +242,7 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False class StableDiffusionControlNetInpaintPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 212fba1089ca..ee385d1d08ea 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -42,7 +42,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from .multicontrolnet import MultiControlNetModel @@ -140,7 +140,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetInpaintPipeline( - DiffusionPipeline, EfficiencyMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 02c5e3092696..24d534af9353 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -55,7 +55,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -116,7 +116,7 @@ class StableDiffusionXLControlNetPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 545285f4d5a9..e07326826c1e 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -54,7 +54,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -157,7 +157,11 @@ def retrieve_latents( class StableDiffusionXLControlNetImg2ImgPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, + LatentDiffusionMixin, + TextualInversionLoaderMixin, + StableDiffusionXLLoraLoaderMixin, + IPAdapterMixin, ): r""" Pipeline for image-to-image generation using Stable Diffusion XL with ControlNet guidance. diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py index 47c9cfdf19f7..c15f3fac4972 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -120,7 +120,7 @@ def retrieve_timesteps( class AltDiffusionPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py index 14f65f0034a2..9f1e9c3fdd6c 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -160,7 +160,7 @@ def retrieve_timesteps( class AltDiffusionImg2ImgPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index 4d3415ca8139..f1b1c83ff279 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -26,7 +26,7 @@ from ....schedulers.scheduling_utils import SchedulerMixin from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -37,7 +37,7 @@ class StableDiffusionModelEditingPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image model editing. diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py index 88b9498c9ab0..4c42bb0d2a7d 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -63,7 +63,7 @@ class StableDiffusionParadigmsPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a parallelized version of Stable Diffusion. diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index ac1f44a2f348..732b1bf70b47 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -31,7 +31,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -105,7 +105,7 @@ class I2VGenXLPipelineOutput(BaseOutput): class I2VGenXLPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, ): r""" Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/). diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 744ca3d19675..b620dce8129e 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -130,7 +130,7 @@ def retrieve_timesteps( class LatentConsistencyModelImg2ImgPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 395e4575942d..62ca96079cae 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -108,7 +108,7 @@ def retrieve_timesteps( class LatentConsistencyModelPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py index 7c9617a3e572..8fd728994a44 100644 --- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py +++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py @@ -36,7 +36,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin if is_librosa_available(): @@ -64,7 +64,7 @@ """ -class MusicLDMPipeline(DiffusionPipeline, EfficiencyMixin): +class MusicLDMPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-audio generation using MusicLDM. diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py index 85c533acaddf..b32e64d2bdc5 100644 --- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py @@ -25,7 +25,7 @@ from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .image_encoder import PaintByExampleImageEncoder @@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class PaintByExamplePipeline(DiffusionPipeline, EfficiencyMixin): +class PaintByExamplePipeline(DiffusionPipeline, LatentDiffusionMixin): r""" diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 90468b0a2127..b60dd62140af 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -46,7 +46,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -212,7 +212,7 @@ class PIAPipelineOutput(BaseOutput): class PIAPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 36fe8d779d27..3a4b22064be2 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -2098,9 +2098,9 @@ def set_attention_slice(self, slice_size: Optional[int]): module.set_attention_slice(slice_size) -class EfficiencyMixin: +class LatentDiffusionMixin: r""" - Helper for DiffusionPipeline with vae and unet.(mainly for stable diffusion) + Helper for DiffusionPipeline with vae and unet.(mainly for LDM such as stable diffusion) """ def enable_vae_slicing(self): diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py index 6513adf5f67d..9e07a1fa1c8e 100644 --- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py @@ -11,14 +11,14 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import SemanticStableDiffusionPipelineOutput logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SemanticStableDiffusionPipeline(DiffusionPipeline, EfficiencyMixin): +class SemanticStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with latent editing. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 860dfeeb85af..c7abaeebe177 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -116,7 +116,7 @@ def retrieve_timesteps( class StableDiffusionPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index 687dab69455d..1e91063a1228 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -34,7 +34,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionImageVariationPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionImageVariationPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline to generate image variations from an input image using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 6df7d62d7c9b..904e3c8c2c2a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -156,7 +156,7 @@ def retrieve_timesteps( class StableDiffusionImg2ImgPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 6652aecdcb5f..1ea5fa6a0670 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -29,7 +29,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -220,7 +220,7 @@ def retrieve_timesteps( class StableDiffusionInpaintPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 6a522cafd19f..2d8ed42935a3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import PIL_INTERPOLATION, deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -73,7 +73,7 @@ def retrieve_latents( class StableDiffusionInstructPix2PixPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion). diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py index 2712b17901a3..6ad27084f8a8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py @@ -27,7 +27,7 @@ from ...schedulers import EulerDiscreteScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -60,7 +60,7 @@ def preprocess(image): return image -class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, EfficiencyMixin, FromSingleFileMixin): +class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin): r""" Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index be26d67322bc..6b0be8b5a8c1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -34,7 +34,7 @@ from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionPipelineOutput @@ -68,7 +68,7 @@ def preprocess(image): class StableDiffusionUpscalePipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-guided image super-resolution using Stable Diffusion 2. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index 23d9e382ac01..821dbd2b6318 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -58,7 +58,7 @@ """ -class StableUnCLIPPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableUnCLIPPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): """ Pipeline for text-to-image generation using stable unCLIP. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index 0a3a3c56c6f4..fd1403c8f8f6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin, ImagePipelineOutput +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -69,7 +69,9 @@ """ -class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableUnCLIPImg2ImgPipeline( + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin +): """ Pipeline for text-guided image-to-image generation using stable unCLIP. diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index b267d88e67e0..84796d936bdc 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a return hidden_states -class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin): +class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin): r""" Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite. diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py index ee6b5a0ac739..ad136b2b3993 100644 --- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py @@ -39,7 +39,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -236,7 +236,7 @@ def preprocess_mask(mask, batch_size: int = 1): class StableDiffusionDiffEditPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py index 76c2c23a3f2b..99b5fc35cf1a 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -99,7 +99,7 @@ """ -class StableDiffusionGLIGENPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionGLIGENPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py index 404f681c3a32..77ccbdf55ca8 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py @@ -34,7 +34,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.clip_image_project_model import CLIPImageProjection from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -145,7 +145,7 @@ """ -class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index ea1beb6788c3..5a3e570cbdaf 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -26,7 +26,7 @@ from ...schedulers import LMSDiscreteScheduler from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput @@ -48,7 +48,7 @@ def apply_model(self, *args, **kwargs): class StableDiffusionKDiffusionPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py index 2742b8797c8f..6925e4eeb1a4 100644 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py @@ -50,7 +50,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -91,7 +91,7 @@ def apply_model(self, *args, **kwargs): class StableDiffusionXLKDiffusionPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index 8f517e3d035c..a456ea747411 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -83,7 +83,7 @@ class LDM3DPipelineOutput(BaseOutput): class StableDiffusionLDM3DPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 29cde4ef328b..22f4ae29c991 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -60,7 +60,7 @@ class StableDiffusionPanoramaPipeline( - DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for text-to-image generation using MultiDiffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py index 936cd5964666..edd3df1506ca 100644 --- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py @@ -14,7 +14,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import StableDiffusionSafePipelineOutput from .safety_checker import SafeStableDiffusionSafetyChecker @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionPipelineSafe(DiffusionPipeline, EfficiencyMixin, IPAdapterMixin): +class StableDiffusionPipelineSafe(DiffusionPipeline, LatentDiffusionMixin, IPAdapterMixin): r""" Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py index 95a4215e6710..8c44849840f9 100644 --- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -98,7 +98,7 @@ def __call__( # Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input -class StableDiffusionSAGPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin): +class StableDiffusionSAGPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 508c09a42fbb..e7d1d28072f6 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -52,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -148,7 +148,7 @@ def retrieve_timesteps( class StableDiffusionXLPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 8c71019b7647..a1034caf4398 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -52,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -165,7 +165,7 @@ def retrieve_timesteps( class StableDiffusionXLImg2ImgPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index f45ebd273f52..43b397fb18a7 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -53,7 +53,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -310,7 +310,7 @@ def retrieve_timesteps( class StableDiffusionXLInpaintPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py index 280e75035f1e..de11d8d8749f 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py @@ -41,7 +41,7 @@ scale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -119,7 +119,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLInstructPix2PixPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index 271082df4c4e..a6575886594b 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -163,7 +163,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class StableDiffusionAdapterPipeline(DiffusionPipeline, EfficiencyMixin): +class StableDiffusionAdapterPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter https://arxiv.org/abs/2302.08453 diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 1b15f1ec3107..16dd4180c2c2 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -51,7 +51,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -181,7 +181,7 @@ def retrieve_timesteps( class StableDiffusionXLAdapterPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index db0c8e54fb6f..005984f8605a 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import TextToVideoSDPipelineOutput @@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: return outputs -class TextToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-to-video generation. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index dcc7c6a7df0f..8ac6507ee717 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from . import TextToVideoSDPipelineOutput @@ -157,7 +157,7 @@ def preprocess_video(video): return video -class VideoToVideoSDPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class VideoToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided video-to-video generation. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py index 991dfeee0a55..76da107e8967 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py @@ -17,7 +17,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from ..stable_diffusion import StableDiffusionSafetyChecker @@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s return warped_latents -class TextToVideoZeroPipeline(DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoZeroPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for zero-shot text-to-video generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py index afd81d04f3fe..c659202838b2 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin if is_invisible_watermark_available(): @@ -327,7 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class TextToVideoZeroSDXLPipeline( DiffusionPipeline, - EfficiencyMixin, + LatentDiffusionMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, ): diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index cad7cb381e64..2c09bb0fad5f 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -21,7 +21,7 @@ from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.outputs import BaseOutput from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, EfficiencyMixin +from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin from .modeling_text_decoder import UniDiffuserTextDecoder from .modeling_uvit import UniDiffuserModel @@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput): text: Optional[Union[List[str], List[List[str]]]] -class UniDiffuserPipeline(DiffusionPipeline, EfficiencyMixin): +class UniDiffuserPipeline(DiffusionPipeline, LatentDiffusionMixin): r""" Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned image generation, image-conditioned text generation, and joint image-text generation. diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 19f5ac445a9e..a87b27d2c479 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -570,7 +570,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class EfficiencyMixin(metaclass=DummyObject): +class LatentDiffusionMixin(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index a11ca7b4a233..89320eb2b936 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -53,7 +53,7 @@ TEXT_TO_IMAGE_PARAMS, ) from ..test_pipelines_common import ( - PipelineEfficiencyFunctionTesterMixin, + LDMFunctionTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, @@ -64,7 +64,7 @@ class StableDiffusion2PipelineFastTests( - PipelineEfficiencyFunctionTesterMixin, + LDMFunctionTesterMixin, PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py index b9e01f598cde..b9327b9d3ce1 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -50,7 +50,7 @@ TEXT_TO_IMAGE_PARAMS, ) from ..test_pipelines_common import ( - PipelineEfficiencyFunctionTesterMixin, + LDMFunctionTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, @@ -61,7 +61,7 @@ class StableDiffusionXLPipelineFastTests( - PipelineEfficiencyFunctionTesterMixin, + LDMFunctionTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 58929296e34d..173fdb54c2ba 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -59,10 +59,10 @@ def check_same_shape(tensor_list): return all(shape == shapes[0] for shape in shapes[1:]) -class PipelineEfficiencyFunctionTesterMixin: +class LDMFunctionTesterMixin: """ This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. - It provides a set of common tests for PyTorch pipeline that inherit from EfficiencyMixin, e.g. vae_slicing, vae_tiling, freeu, etc. + It provides a set of common tests for PyTorch pipeline that inherit from LatentDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc. """ def test_vae_slicing(self): From 6c11d6a852452777f43bacf39bd829c2ad299732 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Wed, 21 Feb 2024 10:58:22 +0800 Subject: [PATCH 07/17] add LDM_component test for pipeline with LatentDiffusionMixin --- tests/pipelines/test_pipelines_common.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 173fdb54c2ba..f14b4e98eae7 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -29,6 +29,7 @@ UNet2DConditionModel, ) from diffusers.image_processor import VaeImageProcessor +from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import logging from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available @@ -1152,6 +1153,14 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs): # accounts for models that modify the number of inference steps based on strength assert pipe.guidance_scale == (inputs["guidance_scale"] + pipe.num_timesteps) + def test_LDM_component(self): + """Any pipeline that have LDMFuncMixin should have vae and unet components.""" + if not issubclass(self.pipeline_class, LatentDiffusionMixin): + return + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + self.assertTrue(hasattr(pipe, "vae")) + self.assertTrue(hasattr(pipe, "unet")) @is_staging_test class PipelinePushToHubTester(unittest.TestCase): From 4602bac99b7c40f8871457616a0ed012282b44a8 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Thu, 22 Feb 2024 15:42:00 +0800 Subject: [PATCH 08/17] rename EfficiencyMixin to StableDiffusionMixin --- .../community/composable_stable_diffusion.py | 4 ++-- examples/community/gluegen.py | 4 ++-- examples/community/ip_adapter_face_id.py | 4 ++-- .../community/latent_consistency_interpolate.py | 4 ++-- examples/community/llm_grounded_diffusion.py | 4 ++-- examples/community/lpw_stable_diffusion.py | 4 ++-- examples/community/lpw_stable_diffusion_xl.py | 4 ++-- .../community/pipeline_animatediff_controlnet.py | 4 ++-- .../community/pipeline_animatediff_img2video.py | 4 ++-- examples/community/pipeline_demofusion_sdxl.py | 4 ++-- .../community/pipeline_sdxl_style_aligned.py | 6 +++--- ...ine_stable_diffusion_xl_controlnet_adapter.py | 4 ++-- ...le_diffusion_xl_controlnet_adapter_inpaint.py | 4 ++-- examples/community/pipeline_zero1to3.py | 6 +++--- examples/community/sd_text2img_k_diffusion.py | 4 ++-- .../community/seed_resize_stable_diffusion.py | 4 ++-- examples/community/speech_to_image_diffusion.py | 4 ++-- .../community/stable_diffusion_comparison.py | 4 ++-- examples/community/stable_diffusion_ipex.py | 4 ++-- examples/community/stable_diffusion_mega.py | 4 ++-- examples/community/stable_diffusion_repaint.py | 4 ++-- examples/community/text_inpainting.py | 4 ++-- .../controlnetxs/pipeline_controlnet_xs.py | 4 ++-- .../controlnetxs/pipeline_controlnet_xs_sd_xl.py | 4 ++-- examples/research_projects/rdm/pipeline_rdm.py | 4 ++-- src/diffusers/__init__.py | 4 ++-- src/diffusers/pipelines/__init__.py | 4 ++-- .../animatediff/pipeline_animatediff.py | 4 ++-- .../pipeline_animatediff_video2video.py | 4 ++-- .../pipelines/audioldm/pipeline_audioldm.py | 4 ++-- .../pipelines/audioldm2/pipeline_audioldm2.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet.py | 4 ++-- .../controlnet/pipeline_controlnet_img2img.py | 4 ++-- .../controlnet/pipeline_controlnet_inpaint.py | 4 ++-- .../pipeline_controlnet_inpaint_sd_xl.py | 4 ++-- .../controlnet/pipeline_controlnet_sd_xl.py | 4 ++-- .../pipeline_controlnet_sd_xl_img2img.py | 4 ++-- .../alt_diffusion/pipeline_alt_diffusion.py | 4 ++-- .../pipeline_alt_diffusion_img2img.py | 4 ++-- .../pipeline_stable_diffusion_model_editing.py | 4 ++-- .../pipeline_stable_diffusion_paradigms.py | 4 ++-- .../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 4 ++-- .../pipeline_latent_consistency_img2img.py | 4 ++-- .../pipeline_latent_consistency_text2img.py | 4 ++-- .../pipelines/musicldm/pipeline_musicldm.py | 4 ++-- .../pipeline_paint_by_example.py | 4 ++-- src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++-- src/diffusers/pipelines/pipeline_utils.py | 2 +- .../pipeline_semantic_stable_diffusion.py | 4 ++-- .../pipeline_stable_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_image_variation.py | 4 ++-- .../pipeline_stable_diffusion_img2img.py | 4 ++-- .../pipeline_stable_diffusion_inpaint.py | 4 ++-- ...pipeline_stable_diffusion_instruct_pix2pix.py | 4 ++-- .../pipeline_stable_diffusion_latent_upscale.py | 4 ++-- .../pipeline_stable_diffusion_upscale.py | 4 ++-- .../stable_diffusion/pipeline_stable_unclip.py | 4 ++-- .../pipeline_stable_unclip_img2img.py | 4 ++-- ...ipeline_stable_diffusion_attend_and_excite.py | 4 ++-- .../pipeline_stable_diffusion_diffedit.py | 4 ++-- .../pipeline_stable_diffusion_gligen.py | 4 ++-- ...ipeline_stable_diffusion_gligen_text_image.py | 4 ++-- .../pipeline_stable_diffusion_k_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_xl_k_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_ldm3d.py | 4 ++-- .../pipeline_stable_diffusion_panorama.py | 4 ++-- .../pipeline_stable_diffusion_safe.py | 4 ++-- .../pipeline_stable_diffusion_sag.py | 4 ++-- .../pipeline_stable_diffusion_xl.py | 4 ++-- .../pipeline_stable_diffusion_xl_img2img.py | 4 ++-- .../pipeline_stable_diffusion_xl_inpaint.py | 4 ++-- ...eline_stable_diffusion_xl_instruct_pix2pix.py | 4 ++-- .../pipeline_stable_diffusion_adapter.py | 4 ++-- .../pipeline_stable_diffusion_xl_adapter.py | 4 ++-- .../pipeline_text_to_video_synth.py | 4 ++-- .../pipeline_text_to_video_synth_img2img.py | 4 ++-- .../pipeline_text_to_video_zero.py | 4 ++-- .../pipeline_text_to_video_zero_sdxl.py | 4 ++-- .../unidiffuser/pipeline_unidiffuser.py | 4 ++-- src/diffusers/utils/dummy_pt_objects.py | 16 ++++++++-------- .../stable_diffusion_2/test_stable_diffusion.py | 4 ++-- .../test_stable_diffusion_xl.py | 4 ++-- tests/pipelines/test_pipelines_common.py | 12 +++++++----- 83 files changed, 178 insertions(+), 176 deletions(-) diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index c7b91f94f294..3153bd30e479 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -22,7 +22,7 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import ( @@ -39,7 +39,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class ComposableStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): +class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/gluegen.py b/examples/community/gluegen.py index f0ace91d683c..b8f147000229 100644 --- a/examples/community/gluegen.py +++ b/examples/community/gluegen.py @@ -10,7 +10,7 @@ from diffusers.loaders import LoraLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -194,7 +194,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class GlueGenStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin, LoraLoaderMixin): +class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, LoraLoaderMixin): def __init__( self, vae: AutoencoderKL, diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py index d1fa98bc9df9..b4d2446b5ce9 100644 --- a/examples/community/ip_adapter_face_id.py +++ b/examples/community/ip_adapter_face_id.py @@ -27,7 +27,7 @@ from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -415,7 +415,7 @@ def retrieve_timesteps( class IPAdapterFaceIDStableDiffusionPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/examples/community/latent_consistency_interpolate.py b/examples/community/latent_consistency_interpolate.py index 44a7c8bec07a..0c14a55bd30f 100644 --- a/examples/community/latent_consistency_interpolate.py +++ b/examples/community/latent_consistency_interpolate.py @@ -9,7 +9,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import LCMScheduler from diffusers.utils import ( @@ -190,7 +190,7 @@ def slerp( class LatentConsistencyModelWalkPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a latent consistency model. diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py index 7f7b0fad39f8..5db144a9a23a 100644 --- a/examples/community/llm_grounded_diffusion.py +++ b/examples/community/llm_grounded_diffusion.py @@ -35,7 +35,7 @@ from diffusers.models.attention_processor import AttnProcessor2_0 from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines import DiffusionPipeline -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -269,7 +269,7 @@ def __call__( class LLMGroundedDiffusionPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 819787ee9a02..78d93bfb7081 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -13,7 +13,7 @@ from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -409,7 +409,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8): class StableDiffusionLongPromptWeightingPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion without tokens length limit, and support parsing diff --git a/examples/community/lpw_stable_diffusion_xl.py b/examples/community/lpw_stable_diffusion_xl.py index c5c93d9ea381..b56adeeea87a 100644 --- a/examples/community/lpw_stable_diffusion_xl.py +++ b/examples/community/lpw_stable_diffusion_xl.py @@ -30,7 +30,7 @@ LoRAXFormersAttnProcessor, XFormersAttnProcessor, ) -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -546,7 +546,7 @@ def retrieve_timesteps( class SDXLLongPromptWeightingPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index a1d6c5605102..2d00b40cfcc5 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -28,7 +28,7 @@ from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unets.unet_motion_model import MotionAdapter from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -112,7 +112,7 @@ class AnimateDiffControlNetPipelineOutput(BaseOutput): class AnimateDiffControlNetPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin ): r""" Pipeline for text-to-video generation. diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index d691ece93050..35f7909bf15f 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -26,7 +26,7 @@ from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unet_motion_model import MotionAdapter -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.schedulers import ( DDIMScheduler, DPMSolverMultistepScheduler, @@ -231,7 +231,7 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput): class AnimateDiffImgToVideoPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin ): r""" Pipeline for text-to-video generation. diff --git a/examples/community/pipeline_demofusion_sdxl.py b/examples/community/pipeline_demofusion_sdxl.py index e7dc269a1e71..e29678b55922 100644 --- a/examples/community/pipeline_demofusion_sdxl.py +++ b/examples/community/pipeline_demofusion_sdxl.py @@ -23,7 +23,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( is_accelerate_available, @@ -94,7 +94,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class DemoFusionSDXLPipeline( - DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin + DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py index b547c35f1123..cc7804fe4237 100644 --- a/examples/community/pipeline_sdxl_style_aligned.py +++ b/examples/community/pipeline_sdxl_style_aligned.py @@ -51,7 +51,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -85,7 +85,7 @@ >>> from typing import List >>> import torch - >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,LatentDiffusionMixin + >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,StableDiffusionMixin >>> from PIL import Image >>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo" @@ -389,7 +389,7 @@ def retrieve_latents( class StyleAlignedSDXLPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py index 49a46e9ba4be..fe94646a4436 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py @@ -33,7 +33,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -159,7 +159,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetAdapterPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py index 5347ab949697..2eaa0a5e0d37 100644 --- a/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py +++ b/examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py @@ -52,7 +52,7 @@ ) from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -305,7 +305,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetAdapterInpaintPipeline( - DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py index 1656ae674382..133aa694c18c 100644 --- a/examples/community/pipeline_zero1to3.py +++ b/examples/community/pipeline_zero1to3.py @@ -22,10 +22,10 @@ # randn_tensor, # replace_example_docstring, # ) -# from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +# from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin # from . import StableDiffusionPipelineOutput # from .safety_checker import StableDiffusionSafetyChecker -from diffusers import AutoencoderKL, DiffusionPipeline, LatentDiffusionMixin, UNet2DConditionModel +from diffusers import AutoencoderKL, DiffusionPipeline, StableDiffusionMixin, UNet2DConditionModel from diffusers.configuration_utils import ConfigMixin, FrozenDict from diffusers.models.modeling_utils import ModelMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -66,7 +66,7 @@ def forward(self, x): return self.projection(x) -class Zero1to3StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): +class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for single view conditioned novel view generation using Zero1to3. diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index 3ca36872830e..3299a7605257 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -19,7 +19,7 @@ import torch from k_diffusion.external import CompVisDenoiser, CompVisVDenoiser -from diffusers import DiffusionPipeline, LatentDiffusionMixin, LMSDiscreteScheduler +from diffusers import DiffusionPipeline, LMSDiscreteScheduler, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import logging @@ -41,7 +41,7 @@ def apply_model(self, *args, **kwargs): return self.model(*args, encoder_hidden_states=encoder_hidden_states, **kwargs).sample -class StableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py index f5e519e0f23a..20f972f049b3 100644 --- a/examples/community/seed_resize_stable_diffusion.py +++ b/examples/community/seed_resize_stable_diffusion.py @@ -9,7 +9,7 @@ from diffusers import DiffusionPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -19,7 +19,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SeedResizeStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): +class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py index 3633348a16df..3537ef89e1a1 100644 --- a/examples/community/speech_to_image_diffusion.py +++ b/examples/community/speech_to_image_diffusion.py @@ -18,7 +18,7 @@ PNDMScheduler, UNet2DConditionModel, ) -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import logging @@ -27,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SpeechToImagePipeline(DiffusionPipeline, LatentDiffusionMixin): +class SpeechToImagePipeline(DiffusionPipeline, StableDiffusionMixin): def __init__( self, speech_model: WhisperForConditionalGeneration, diff --git a/examples/community/stable_diffusion_comparison.py b/examples/community/stable_diffusion_comparison.py index 3723aa01f541..dab5705b3370 100644 --- a/examples/community/stable_diffusion_comparison.py +++ b/examples/community/stable_diffusion_comparison.py @@ -12,7 +12,7 @@ StableDiffusionPipeline, UNet2DConditionModel, ) -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -23,7 +23,7 @@ pipe4_model_id = "CompVis/stable-diffusion-v1-4" -class StableDiffusionComparisonPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for parallel comparison of Stable Diffusion v1-v4 This pipeline inherits from DiffusionPipeline and depends on the use of an Auth Token for diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py index fba1f145baa6..8e71f79e9ae4 100644 --- a/examples/community/stable_diffusion_ipex.py +++ b/examples/community/stable_diffusion_ipex.py @@ -23,7 +23,7 @@ from diffusers.configuration_utils import FrozenDict from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -61,7 +61,7 @@ class StableDiffusionIPEXPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion on IPEX. diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py index 3bdaa1d8dd58..e53afb703e24 100644 --- a/examples/community/stable_diffusion_mega.py +++ b/examples/community/stable_diffusion_mega.py @@ -16,7 +16,7 @@ UNet2DConditionModel, ) from diffusers.configuration_utils import FrozenDict -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.utils import deprecate, logging @@ -24,7 +24,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionMegaPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index 5ee194ab80c8..02bef293bba8 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -24,7 +24,7 @@ from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel from diffusers.configuration_utils import FrozenDict, deprecate from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import ( StableDiffusionSafetyChecker, @@ -140,7 +140,7 @@ def prepare_mask_and_masked_image(image, mask): class StableDiffusionRepaintPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py index 4276de3f92d4..ea4da966bb71 100644 --- a/examples/community/text_inpainting.py +++ b/examples/community/text_inpainting.py @@ -13,7 +13,7 @@ from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionInpaintPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -23,7 +23,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class TextInpainting(DiffusionPipeline, LatentDiffusionMixin): +class TextInpainting(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text based inpainting using Stable Diffusion. Uses CLIPSeg to get a mask from the given text, then calls the Inpainting pipeline with the generated mask diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py index 6937bf72b86b..88a586e9271d 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs.py @@ -26,7 +26,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -44,7 +44,7 @@ class StableDiffusionControlNetXSPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance. diff --git a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py index 50bc6803089e..d0186573fa9c 100644 --- a/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py +++ b/examples/research_projects/controlnetxs/pipeline_controlnet_xs_sd_xl.py @@ -31,7 +31,7 @@ XFormersAttnProcessor, ) from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -53,7 +53,7 @@ class StableDiffusionXLControlNetXSPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, diff --git a/examples/research_projects/rdm/pipeline_rdm.py b/examples/research_projects/rdm/pipeline_rdm.py index 7a5d7cdb5b47..dd97bf71b9db 100644 --- a/examples/research_projects/rdm/pipeline_rdm.py +++ b/examples/research_projects/rdm/pipeline_rdm.py @@ -19,7 +19,7 @@ UNet2DConditionModel, ) from diffusers.image_processor import VaeImageProcessor -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.utils import logging from diffusers.utils.torch_utils import randn_tensor @@ -27,7 +27,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class RDMPipeline(DiffusionPipeline, LatentDiffusionMixin): +class RDMPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Retrieval Augmented Diffusion. diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index dbe3394b0438..cf07c841b448 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -123,12 +123,12 @@ "DiTPipeline", "ImagePipelineOutput", "KarrasVePipeline", - "LatentDiffusionMixin", "LDMPipeline", "LDMSuperResolutionPipeline", "PNDMPipeline", "RePaintPipeline", "ScoreSdeVePipeline", + "StableDiffusionMixin", ] ) _import_structure["schedulers"].extend( @@ -508,12 +508,12 @@ DiTPipeline, ImagePipelineOutput, KarrasVePipeline, - LatentDiffusionMixin, LDMPipeline, LDMSuperResolutionPipeline, PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline, + StableDiffusionMixin, ) from .schedulers import ( AmusedScheduler, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index af557aee6fb9..a1840201f8ba 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -48,7 +48,7 @@ _import_structure["pipeline_utils"] = [ "AudioPipelineOutput", "DiffusionPipeline", - "LatentDiffusionMixin", + "StableDiffusionMixin", "ImagePipelineOutput", ] _import_structure["deprecated"].extend( @@ -330,7 +330,7 @@ AudioPipelineOutput, DiffusionPipeline, ImagePipelineOutput, - LatentDiffusionMixin, + StableDiffusionMixin, ) try: diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 11769c30f3f8..10e7175c2713 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -42,7 +42,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -88,7 +88,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: class AnimateDiffPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 4ce69450ae9e..bfa1785081a6 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -35,7 +35,7 @@ from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import AnimateDiffPipelineOutput @@ -166,7 +166,7 @@ def retrieve_timesteps( class AnimateDiffVideoToVideoPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py index c1661839cc6d..69bebdd0dc4f 100644 --- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py +++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py @@ -24,7 +24,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import logging, replace_example_docstring from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -49,7 +49,7 @@ """ -class AudioLDMPipeline(DiffusionPipeline, LatentDiffusionMixin): +class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-audio generation using AudioLDM. diff --git a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py index 27118d30d3c8..e01aa9929dd8 100644 --- a/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py +++ b/src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py @@ -173,7 +173,7 @@ def __init__( ) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) - # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.enable_vae_slicing + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_slicing def enable_vae_slicing(self): r""" Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to @@ -181,7 +181,7 @@ def enable_vae_slicing(self): """ self.vae.enable_slicing() - # Copied from diffusers.pipelines.pipeline_utils.LatentDiffusionMixin.disable_vae_slicing + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing def disable_vae_slicing(self): r""" Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index e65df8b78143..9f968daaa03c 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -138,7 +138,7 @@ def retrieve_timesteps( class StableDiffusionControlNetPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 94fc3f8b646a..304767107332 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -131,7 +131,7 @@ def prepare_image(image): class StableDiffusionControlNetImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 7d456793e293..96c4245ba9e6 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .multicontrolnet import MultiControlNetModel @@ -242,7 +242,7 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image=False class StableDiffusionControlNetInpaintPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 4da8542392c6..bde3647c0c48 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -53,7 +53,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput from .multicontrolnet import MultiControlNetModel @@ -151,7 +151,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLControlNetInpaintPipeline( - DiffusionPipeline, LatentDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, IPAdapterMixin + DiffusionPipeline, StableDiffusionMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, IPAdapterMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion XL. diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 24d534af9353..4e0a880a4a11 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -55,7 +55,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -116,7 +116,7 @@ class StableDiffusionXLControlNetPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index e07326826c1e..41a8c4fa005e 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -54,7 +54,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import is_compiled_module, randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -158,7 +158,7 @@ def retrieve_latents( class StableDiffusionXLControlNetImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py index c15f3fac4972..e4583699e79e 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -120,7 +120,7 @@ def retrieve_timesteps( class AltDiffusionPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py index 9f1e9c3fdd6c..156e52c249d9 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .modeling_roberta_series import RobertaSeriesModelWithTransformation from .pipeline_output import AltDiffusionPipelineOutput @@ -160,7 +160,7 @@ def retrieve_timesteps( class AltDiffusionImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py index f1b1c83ff279..dee93fc2eb53 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py @@ -26,7 +26,7 @@ from ....schedulers.scheduling_utils import SchedulerMixin from ....utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -37,7 +37,7 @@ class StableDiffusionModelEditingPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image model editing. diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py index 4c42bb0d2a7d..ddc866ef9b86 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -63,7 +63,7 @@ class StableDiffusionParadigmsPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-to-image generation using a parallelized version of Stable Diffusion. diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 732b1bf70b47..2df21533962c 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -31,7 +31,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -105,7 +105,7 @@ class I2VGenXLPipelineOutput(BaseOutput): class I2VGenXLPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, ): r""" Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/). diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index b620dce8129e..d2580b83c74a 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -130,7 +130,7 @@ def retrieve_timesteps( class LatentConsistencyModelImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 62ca96079cae..a383f346aacd 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -108,7 +108,7 @@ def retrieve_timesteps( class LatentConsistencyModelPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py index 8fd728994a44..5fde3450b9a0 100644 --- a/src/diffusers/pipelines/musicldm/pipeline_musicldm.py +++ b/src/diffusers/pipelines/musicldm/pipeline_musicldm.py @@ -36,7 +36,7 @@ replace_example_docstring, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin if is_librosa_available(): @@ -64,7 +64,7 @@ """ -class MusicLDMPipeline(DiffusionPipeline, LatentDiffusionMixin): +class MusicLDMPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-audio generation using MusicLDM. diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py index b32e64d2bdc5..8a24f134e793 100644 --- a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +++ b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py @@ -25,7 +25,7 @@ from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker from .image_encoder import PaintByExampleImageEncoder @@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask): return mask, masked_image -class PaintByExamplePipeline(DiffusionPipeline, LatentDiffusionMixin): +class PaintByExamplePipeline(DiffusionPipeline, StableDiffusionMixin): r""" diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index b60dd62140af..37c0ed142c51 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -46,7 +46,7 @@ ) from ...utils.torch_utils import randn_tensor from ..free_init_utils import FreeInitMixin -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -212,7 +212,7 @@ class PIAPipelineOutput(BaseOutput): class PIAPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 3a4b22064be2..d472687bb915 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -2098,7 +2098,7 @@ def set_attention_slice(self, slice_size: Optional[int]): module.set_attention_slice(slice_size) -class LatentDiffusionMixin: +class StableDiffusionMixin: r""" Helper for DiffusionPipeline with vae and unet.(mainly for LDM such as stable diffusion) """ diff --git a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py index 9e07a1fa1c8e..f0e25264ffa7 100644 --- a/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +++ b/src/diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py @@ -11,14 +11,14 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import SemanticStableDiffusionPipelineOutput logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class SemanticStableDiffusionPipeline(DiffusionPipeline, LatentDiffusionMixin): +class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with latent editing. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index c7abaeebe177..762565ea1fd3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -116,7 +116,7 @@ def retrieve_timesteps( class StableDiffusionPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index 1e91063a1228..1333cb825750 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -34,7 +34,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionImageVariationPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline to generate image variations from an input image using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 904e3c8c2c2a..e79a053b7662 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -156,7 +156,7 @@ def retrieve_timesteps( class StableDiffusionImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 1ea5fa6a0670..62e289c7ba36 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -29,7 +29,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -220,7 +220,7 @@ def retrieve_timesteps( class StableDiffusionInpaintPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index 2d8ed42935a3..89d4278937fe 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -26,7 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import PIL_INTERPOLATION, deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -73,7 +73,7 @@ def retrieve_latents( class StableDiffusionInstructPix2PixPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion). diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py index 6ad27084f8a8..918dffe5199d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py @@ -27,7 +27,7 @@ from ...schedulers import EulerDiscreteScheduler from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -60,7 +60,7 @@ def preprocess(image): return image -class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, LatentDiffusionMixin, FromSingleFileMixin): +class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin): r""" Pipeline for upscaling Stable Diffusion output image resolution by a factor of 2. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 6b0be8b5a8c1..2d04cf41d9b5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -34,7 +34,7 @@ from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput @@ -68,7 +68,7 @@ def preprocess(image): class StableDiffusionUpscalePipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): r""" Pipeline for text-guided image super-resolution using Stable Diffusion 2. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index 821dbd2b6318..c62e0f4ec50f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -58,7 +58,7 @@ """ -class StableUnCLIPPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): """ Pipeline for text-to-image generation using stable unCLIP. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index fd1403c8f8f6..9b85d9e6b1a4 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -70,7 +70,7 @@ class StableUnCLIPImg2ImgPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): """ Pipeline for text-guided image-to-image generation using stable unCLIP. diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index 84796d936bdc..03c80b46b806 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -170,7 +170,7 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a return hidden_states -class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin): +class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin): r""" Pipeline for text-to-image generation using Stable Diffusion and Attend-and-Excite. diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py index ad136b2b3993..4c90ce0646c4 100644 --- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py @@ -39,7 +39,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -236,7 +236,7 @@ def preprocess_mask(mask, batch_size: int = 1): class StableDiffusionDiffEditPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py index 99b5fc35cf1a..9f0d1190fd87 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py @@ -35,7 +35,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -99,7 +99,7 @@ """ -class StableDiffusionGLIGENPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py index 77ccbdf55ca8..bbffaf2884a3 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py @@ -34,7 +34,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.clip_image_project_model import CLIPImageProjection from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -145,7 +145,7 @@ """ -class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN). diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 5a3e570cbdaf..bc565c938a30 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -26,7 +26,7 @@ from ...schedulers import LMSDiscreteScheduler from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput @@ -48,7 +48,7 @@ def apply_model(self, *args, **kwargs): class StableDiffusionKDiffusionPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin ): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py index 6925e4eeb1a4..ed46a1e36b60 100644 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py @@ -50,7 +50,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -91,7 +91,7 @@ def apply_model(self, *args, **kwargs): class StableDiffusionXLKDiffusionPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index a456ea747411..ddbf9ebbb1d0 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -36,7 +36,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -83,7 +83,7 @@ class LDM3DPipelineOutput(BaseOutput): class StableDiffusionLDM3DPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 22f4ae29c991..57ca56f9afd8 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -32,7 +32,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -60,7 +60,7 @@ class StableDiffusionPanoramaPipeline( - DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin ): r""" Pipeline for text-to-image generation using MultiDiffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py index edd3df1506ca..24c648a813ba 100644 --- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py @@ -14,7 +14,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import deprecate, logging from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionSafePipelineOutput from .safety_checker import SafeStableDiffusionSafetyChecker @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionPipelineSafe(DiffusionPipeline, LatentDiffusionMixin, IPAdapterMixin): +class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin): r""" Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py index 8c44849840f9..878a3fdac211 100644 --- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionPipelineOutput from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -98,7 +98,7 @@ def __call__( # Modified to get self-attention guidance scale in this paper (https://arxiv.org/pdf/2210.00939.pdf) as an input -class StableDiffusionSAGPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin): +class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index e7d1d28072f6..5e95535ef50e 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -52,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -148,7 +148,7 @@ def retrieve_timesteps( class StableDiffusionXLPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index a1034caf4398..eb5d9c64538a 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -52,7 +52,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -165,7 +165,7 @@ def retrieve_timesteps( class StableDiffusionXLImg2ImgPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 43b397fb18a7..61b9f7ed2fbd 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -53,7 +53,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -310,7 +310,7 @@ def retrieve_timesteps( class StableDiffusionXLInpaintPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin, diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py index de11d8d8749f..b3327996263a 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py @@ -41,7 +41,7 @@ scale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .pipeline_output import StableDiffusionXLPipelineOutput @@ -119,7 +119,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class StableDiffusionXLInstructPix2PixPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index a6575886594b..0b55bb38b5eb 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -163,7 +163,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class StableDiffusionAdapterPipeline(DiffusionPipeline, LatentDiffusionMixin): +class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter https://arxiv.org/abs/2302.08453 diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 16dd4180c2c2..96c7c6857c05 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -51,7 +51,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput @@ -181,7 +181,7 @@ def retrieve_timesteps( class StableDiffusionXLAdapterPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, IPAdapterMixin, diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index 005984f8605a..0ed0765703f2 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -33,7 +33,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import TextToVideoSDPipelineOutput @@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: return outputs -class TextToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoSDPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-to-video generation. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 8ac6507ee717..40c486316e13 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -34,7 +34,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import TextToVideoSDPipelineOutput @@ -157,7 +157,7 @@ def preprocess_video(video): return video -class VideoToVideoSDPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class VideoToVideoSDPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for text-guided video-to-video generation. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py index 76da107e8967..408ae23f4d9f 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py @@ -17,7 +17,7 @@ from ...schedulers import KarrasDiffusionSchedulers from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ..stable_diffusion import StableDiffusionSafetyChecker @@ -281,7 +281,7 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s return warped_latents -class TextToVideoZeroPipeline(DiffusionPipeline, LatentDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): +class TextToVideoZeroPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin): r""" Pipeline for zero-shot text-to-video generation using Stable Diffusion. diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py index c659202838b2..eaa2760363a9 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py @@ -37,7 +37,7 @@ unscale_lora_layers, ) from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin if is_invisible_watermark_available(): @@ -327,7 +327,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): class TextToVideoZeroSDXLPipeline( DiffusionPipeline, - LatentDiffusionMixin, + StableDiffusionMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin, ): diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index 2c09bb0fad5f..bacc1c40abcc 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -21,7 +21,7 @@ from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.outputs import BaseOutput from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, LatentDiffusionMixin +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from .modeling_text_decoder import UniDiffuserTextDecoder from .modeling_uvit import UniDiffuserModel @@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput): text: Optional[Union[List[str], List[List[str]]]] -class UniDiffuserPipeline(DiffusionPipeline, LatentDiffusionMixin): +class UniDiffuserPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned image generation, image-conditioned text generation, and joint image-text generation. diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index a87b27d2c479..5a87f26fcc94 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -570,7 +570,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class LatentDiffusionMixin(metaclass=DummyObject): +class ImagePipelineOutput(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -585,7 +585,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class ImagePipelineOutput(metaclass=DummyObject): +class KarrasVePipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -600,7 +600,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class KarrasVePipeline(metaclass=DummyObject): +class LDMPipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -615,7 +615,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class LDMPipeline(metaclass=DummyObject): +class LDMSuperResolutionPipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -630,7 +630,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class LDMSuperResolutionPipeline(metaclass=DummyObject): +class PNDMPipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -645,7 +645,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class PNDMPipeline(metaclass=DummyObject): +class RePaintPipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -660,7 +660,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class RePaintPipeline(metaclass=DummyObject): +class ScoreSdeVePipeline(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): @@ -675,7 +675,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch"]) -class ScoreSdeVePipeline(metaclass=DummyObject): +class StableDiffusionMixin(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index 89320eb2b936..7aef098916ca 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -53,10 +53,10 @@ TEXT_TO_IMAGE_PARAMS, ) from ..test_pipelines_common import ( - LDMFunctionTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, + SDFunctionTesterMixin, ) @@ -64,7 +64,7 @@ class StableDiffusion2PipelineFastTests( - LDMFunctionTesterMixin, + SDFunctionTesterMixin, PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py index b9327b9d3ce1..82eedac84ca3 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -50,9 +50,9 @@ TEXT_TO_IMAGE_PARAMS, ) from ..test_pipelines_common import ( - LDMFunctionTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, + SDFunctionTesterMixin, SDXLOptionalComponentsTesterMixin, ) @@ -61,7 +61,7 @@ class StableDiffusionXLPipelineFastTests( - LDMFunctionTesterMixin, + SDFunctionTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index f14b4e98eae7..333137cc7861 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -29,7 +29,7 @@ UNet2DConditionModel, ) from diffusers.image_processor import VaeImageProcessor -from diffusers.pipelines.pipeline_utils import LatentDiffusionMixin +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import logging from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available @@ -60,10 +60,10 @@ def check_same_shape(tensor_list): return all(shape == shapes[0] for shape in shapes[1:]) -class LDMFunctionTesterMixin: +class SDFunctionTesterMixin: """ This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. - It provides a set of common tests for PyTorch pipeline that inherit from LatentDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc. + It provides a set of common tests for PyTorch pipeline that inherit from StableDiffusionMixin, e.g. vae_slicing, vae_tiling, freeu, etc. """ def test_vae_slicing(self): @@ -94,7 +94,8 @@ def test_vae_tiling(self): components = self.get_dummy_components() # make sure here that pndm scheduler skips prk - components["safety_checker"] = None + if "safety_checker" in components: + components["safety_checker"] = None pipe = self.pipeline_class(**components) pipe = pipe.to(device) pipe.set_progress_bar_config(disable=None) @@ -1155,13 +1156,14 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs): def test_LDM_component(self): """Any pipeline that have LDMFuncMixin should have vae and unet components.""" - if not issubclass(self.pipeline_class, LatentDiffusionMixin): + if not issubclass(self.pipeline_class, StableDiffusionMixin): return components = self.get_dummy_components() pipe = self.pipeline_class(**components) self.assertTrue(hasattr(pipe, "vae")) self.assertTrue(hasattr(pipe, "unet")) + @is_staging_test class PipelinePushToHubTester(unittest.TestCase): identifier = uuid.uuid4() From ebfd3a77faa811ce275896346073cd94fec55121 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Fri, 23 Feb 2024 16:05:59 +0800 Subject: [PATCH 09/17] Add more SDFunctionTesterMixin to cover different UNet type --- tests/pipelines/animatediff/test_animatediff.py | 6 ++++-- tests/pipelines/i2vgen_xl/test_i2vgenxl.py | 4 ++-- tests/pipelines/test_pipelines_common.py | 10 ++++++++-- .../text_to_video_synthesis/test_text_to_video.py | 4 ++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/pipelines/animatediff/test_animatediff.py b/tests/pipelines/animatediff/test_animatediff.py index 3b789e4ff0f3..288f856dc677 100644 --- a/tests/pipelines/animatediff/test_animatediff.py +++ b/tests/pipelines/animatediff/test_animatediff.py @@ -18,7 +18,7 @@ from diffusers.utils.testing_utils import numpy_cosine_similarity_distance, require_torch_gpu, slow, torch_device from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS -from ..test_pipelines_common import IPAdapterTesterMixin, PipelineTesterMixin +from ..test_pipelines_common import IPAdapterTesterMixin, PipelineTesterMixin, SDFunctionTesterMixin def to_np(tensor): @@ -28,7 +28,9 @@ def to_np(tensor): return tensor -class AnimateDiffPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, unittest.TestCase): +class AnimateDiffPipelineFastTests( + IPAdapterTesterMixin, SDFunctionTesterMixin, PipelineTesterMixin, unittest.TestCase +): pipeline_class = AnimateDiffPipeline params = TEXT_TO_IMAGE_PARAMS batch_params = TEXT_TO_IMAGE_BATCH_PARAMS diff --git a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py index 004b06f160bd..aeda67174ad5 100644 --- a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py +++ b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py @@ -46,14 +46,14 @@ torch_device, ) -from ..test_pipelines_common import PipelineTesterMixin +from ..test_pipelines_common import PipelineTesterMixin, SDFunctionTesterMixin enable_full_determinism() @skip_mps -class I2VGenXLPipelineFastTests(PipelineTesterMixin, unittest.TestCase): +class I2VGenXLPipelineFastTests(SDFunctionTesterMixin, PipelineTesterMixin, unittest.TestCase): pipeline_class = I2VGenXLPipeline params = frozenset(["prompt", "negative_prompt", "image"]) batch_params = frozenset(["prompt", "negative_prompt", "image", "generator"]) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 41d14dd6c7dd..6c0ff7d9a9fd 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -30,6 +30,9 @@ ) from diffusers.image_processor import VaeImageProcessor from diffusers.loaders import IPAdapterMixin +from diffusers.models.unets.unet_3d_condition import UNet3DConditionModel +from diffusers.models.unets.unet_i2vgen_xl import I2VGenXLUNet +from diffusers.models.unets.unet_motion_model import UNetMotionModel from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import logging @@ -1274,8 +1277,11 @@ def test_LDM_component(self): return components = self.get_dummy_components() pipe = self.pipeline_class(**components) - self.assertTrue(hasattr(pipe, "vae")) - self.assertTrue(hasattr(pipe, "unet")) + self.assertTrue(hasattr(pipe, "vae") and isinstance(self.pipe.vae, (AutoencoderKL, AutoencoderTiny))) + self.assertTrue( + hasattr(pipe, "unet") + and isinstance(pipe.unet, (UNet2DConditionModel, UNet3DConditionModel, I2VGenXLUNet, UNetMotionModel)) + ) @is_staging_test diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py index d988350505a8..9dc48011d2f1 100644 --- a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py +++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py @@ -37,14 +37,14 @@ ) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS -from ..test_pipelines_common import PipelineTesterMixin +from ..test_pipelines_common import PipelineTesterMixin, SDFunctionTesterMixin enable_full_determinism() @skip_mps -class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): +class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin, unittest.TestCase): pipeline_class = TextToVideoSDPipeline params = TEXT_TO_IMAGE_PARAMS batch_params = TEXT_TO_IMAGE_BATCH_PARAMS From 066c14d227d2cfcb1e92b6475f217c97456eee64 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Fri, 23 Feb 2024 16:06:17 +0800 Subject: [PATCH 10/17] add StableDiffusionMixin to InstaFlowPipeline --- examples/community/instaflow_one_step.py | 35 +++--------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/examples/community/instaflow_one_step.py b/examples/community/instaflow_one_step.py index 065abfe13d23..b07d85f8fcdf 100644 --- a/examples/community/instaflow_one_step.py +++ b/examples/community/instaflow_one_step.py @@ -24,7 +24,7 @@ from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.lora import adjust_lora_scale_text_encoder -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers @@ -52,7 +52,9 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): return noise_cfg -class InstaFlowPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin): +class InstaFlowPipeline( + DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin +): r""" Pipeline for text-to-image generation using Rectified Flow and Euler discretization. This customized pipeline is based on StableDiffusionPipeline from the official Diffusers library (0.21.4) @@ -180,35 +182,6 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.register_to_config(requires_safety_checker=requires_safety_checker) - def enable_vae_slicing(self): - r""" - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. - """ - self.vae.enable_slicing() - - def disable_vae_slicing(self): - r""" - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_slicing() - - def enable_vae_tiling(self): - r""" - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow - processing larger images. - """ - self.vae.enable_tiling() - - def disable_vae_tiling(self): - r""" - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to - computing decoding in one step. - """ - self.vae.disable_tiling() - def _encode_prompt( self, prompt, From fdc43c5475e2c495bf53b4491ff812ce72146fac Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Fri, 23 Feb 2024 16:06:46 +0800 Subject: [PATCH 11/17] remove StableDiffusionMixin from UniDiffuserPipeline --- .../unidiffuser/pipeline_unidiffuser.py | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index bacc1c40abcc..5d61b1054e1c 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -21,7 +21,7 @@ from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from ...utils.outputs import BaseOutput from ...utils.torch_utils import randn_tensor -from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin +from ..pipeline_utils import DiffusionPipeline from .modeling_text_decoder import UniDiffuserTextDecoder from .modeling_uvit import UniDiffuserModel @@ -48,7 +48,7 @@ class ImageTextPipelineOutput(BaseOutput): text: Optional[Union[List[str], List[List[str]]]] -class UniDiffuserPipeline(DiffusionPipeline, StableDiffusionMixin): +class UniDiffuserPipeline(DiffusionPipeline): r""" Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned image generation, image-conditioned text generation, and joint image-text generation. @@ -211,6 +211,39 @@ def _infer_mode(self, prompt, prompt_embeds, image, latents, prompt_latents, vae return mode + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_slicing + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_slicing + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.enable_vae_tiling + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + self.vae.enable_tiling() + + # Copied from diffusers.pipelines.pipeline_utils.StableDiffusionMixin.disable_vae_tiling + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_tiling() + # Functions to manually set the mode def set_text_mode(self): r"""Manually set the generation mode to unconditional ("marginal") text generation.""" From 009914468207430b13a025a100702287ca31c594 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Fri, 23 Feb 2024 15:32:20 -1000 Subject: [PATCH 12/17] Update tests/pipelines/test_pipelines_common.py --- tests/pipelines/test_pipelines_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 6c0ff7d9a9fd..428cb0750088 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -1277,7 +1277,7 @@ def test_LDM_component(self): return components = self.get_dummy_components() pipe = self.pipeline_class(**components) - self.assertTrue(hasattr(pipe, "vae") and isinstance(self.pipe.vae, (AutoencoderKL, AutoencoderTiny))) + self.assertTrue(hasattr(pipe, "vae") and isinstance(pipe.vae, (AutoencoderKL, AutoencoderTiny))) self.assertTrue( hasattr(pipe, "unet") and isinstance(pipe.unet, (UNet2DConditionModel, UNet3DConditionModel, I2VGenXLUNet, UNetMotionModel)) From 4a294ecf8556465f12b05d235be148f7eb7d7015 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Sun, 25 Feb 2024 12:03:06 +0800 Subject: [PATCH 13/17] make SDFunctionTesterMixin run on non-image diffsuion pipeline --- tests/pipelines/test_pipelines_common.py | 33 +++++++++++++----------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 428cb0750088..95ef904095ee 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -89,10 +89,10 @@ def test_vae_slicing(self): pipe.enable_vae_slicing() inputs = self.get_dummy_inputs(device) inputs["prompt"] = [inputs["prompt"]] * image_count + inputs["return_dict"] = False output_2 = pipe(**inputs) - # there is a small discrepancy at image borders vs. full batch decode - assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 3e-3 + assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 3e-3 def test_vae_tiling(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -109,14 +109,14 @@ def test_vae_tiling(self): # Test that tiled decode at 512x512 yields the same result as the non-tiled decode generator = torch.Generator(device=device).manual_seed(0) - output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False) # make sure tiled vae decode yields the same result pipe.enable_vae_tiling() generator = torch.Generator(device=device).manual_seed(0) - output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np") + output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False) - assert np.abs(output_2.images.flatten() - output_1.images.flatten()).max() < 5e-1 + assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 5e-1 # test that tiled decode works with various shapes shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] @@ -131,10 +131,10 @@ def test_freeu_enabled(self): pipe.set_progress_bar_config(disable=None) prompt = "hey" - output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) - output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] assert not np.allclose( output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1] @@ -147,7 +147,7 @@ def test_freeu_disabled(self): pipe.set_progress_bar_config(disable=None) prompt = "hey" - output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) pipe.disable_freeu() @@ -157,7 +157,7 @@ def test_freeu_disabled(self): for key in freeu_keys: assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None." - output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0)).images + output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] assert np.allclose( output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1] @@ -171,18 +171,21 @@ def test_fused_qkv_projections(self): pipe.set_progress_bar_config(disable=None) inputs = self.get_dummy_inputs(device) - image = pipe(**inputs).images + inputs["return_dict"] = False + image = pipe(**inputs)[0] original_image_slice = image[0, -3:, -3:, -1] pipe.fuse_qkv_projections() inputs = self.get_dummy_inputs(device) - image = pipe(**inputs).images - image_slice_fused = image[0, -3:, -3:, -1] + inputs["return_dict"] = False + image_fused = pipe(**inputs)[0] + image_slice_fused = image_fused[0, -3:, -3:, -1] pipe.unfuse_qkv_projections() inputs = self.get_dummy_inputs(device) - image = pipe(**inputs).images - image_slice_disabled = image[0, -3:, -3:, -1] + inputs["return_dict"] = False + image_disabled = pipe(**inputs)[0] + image_slice_disabled = image_disabled[0, -3:, -3:, -1] assert np.allclose( original_image_slice, image_slice_fused, atol=1e-2, rtol=1e-2 @@ -1271,7 +1274,7 @@ def callback_increase_guidance(pipe, i, t, callback_kwargs): # accounts for models that modify the number of inference steps based on strength assert pipe.guidance_scale == (inputs["guidance_scale"] + pipe.num_timesteps) - def test_LDM_component(self): + def test_StableDiffusionMixin_component(self): """Any pipeline that have LDMFuncMixin should have vae and unet components.""" if not issubclass(self.pipeline_class, StableDiffusionMixin): return From b3c3de04feac417c6011eb7424422e5906813373 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Sun, 25 Feb 2024 12:03:18 +0800 Subject: [PATCH 14/17] fix fuse_projection by check is_cross_attention when init --- src/diffusers/models/attention_processor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py index 1c008264ba33..3d973608934a 100644 --- a/src/diffusers/models/attention_processor.py +++ b/src/diffusers/models/attention_processor.py @@ -116,6 +116,8 @@ def __init__( super().__init__() self.inner_dim = out_dim if out_dim is not None else dim_head * heads self.query_dim = query_dim + self.use_bias = bias + self.is_cross_attention = cross_attention_dim is not None self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim self.upcast_attention = upcast_attention self.upcast_softmax = upcast_softmax @@ -693,27 +695,32 @@ def norm_encoder_hidden_states(self, encoder_hidden_states: torch.Tensor) -> tor @torch.no_grad() def fuse_projections(self, fuse=True): - is_cross_attention = self.cross_attention_dim != self.query_dim device = self.to_q.weight.data.device dtype = self.to_q.weight.data.dtype - if not is_cross_attention: + if not self.is_cross_attention: # fetch weight matrices. concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) in_features = concatenated_weights.shape[1] out_features = concatenated_weights.shape[0] # create a new single projection layer and copy over the weights. - self.to_qkv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype) + self.to_qkv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) self.to_qkv.weight.copy_(concatenated_weights) + if self.use_bias: + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + self.to_qkv.bias.copy_(concatenated_bias) else: concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) in_features = concatenated_weights.shape[1] out_features = concatenated_weights.shape[0] - self.to_kv = self.linear_cls(in_features, out_features, bias=False, device=device, dtype=dtype) + self.to_kv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) self.to_kv.weight.copy_(concatenated_weights) + if self.use_bias: + concatenated_bias = torch.cat([ self.to_k.bias.data, self.to_v.bias.data]) + self.to_kv.bias.copy_(concatenated_bias) self.fused_projections = fuse From 994299c3113b32b9c8d4199afe511dafd8b7a685 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Tue, 27 Feb 2024 20:52:09 +0800 Subject: [PATCH 15/17] use get_dummy_inputs for test_vae_tiling and test_freeu --- src/diffusers/models/attention_processor.py | 2 +- tests/pipelines/test_pipelines_common.py | 32 ++++++++++++--------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py index 9d13b0aec945..5ec8876fc114 100644 --- a/src/diffusers/models/attention_processor.py +++ b/src/diffusers/models/attention_processor.py @@ -723,7 +723,7 @@ def fuse_projections(self, fuse=True): self.to_kv = self.linear_cls(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) self.to_kv.weight.copy_(concatenated_weights) if self.use_bias: - concatenated_bias = torch.cat([ self.to_k.bias.data, self.to_v.bias.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) self.to_kv.bias.copy_(concatenated_bias) self.fused_projections = fuse diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 95ef904095ee..0e3d20baeb8a 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -105,18 +105,19 @@ def test_vae_tiling(self): pipe = pipe.to(device) pipe.set_progress_bar_config(disable=None) - prompt = "A painting of a squirrel eating a burger" + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False # Test that tiled decode at 512x512 yields the same result as the non-tiled decode - generator = torch.Generator(device=device).manual_seed(0) - output_1 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False) + output_1 = pipe(**inputs)[0] # make sure tiled vae decode yields the same result pipe.enable_vae_tiling() - generator = torch.Generator(device=device).manual_seed(0) - output_2 = pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np", return_dict=False) + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False + output_2 = pipe(**inputs)[0] - assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 5e-1 + assert np.abs(output_2.flatten() - output_1.flatten()).max() < 5e-1 # test that tiled decode works with various shapes shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] @@ -130,11 +131,14 @@ def test_freeu_enabled(self): pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - prompt = "hey" - output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False + output = pipe(**inputs)[0] pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) - output_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False + output_freeu = pipe(**inputs)[0] assert not np.allclose( output[0, -3:, -3:, -1], output_freeu[0, -3:, -3:, -1] @@ -146,8 +150,9 @@ def test_freeu_disabled(self): pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - prompt = "hey" - output = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False + output = pipe(**inputs)[0] pipe.enable_freeu(s1=0.9, s2=0.2, b1=1.2, b2=1.4) pipe.disable_freeu() @@ -157,8 +162,9 @@ def test_freeu_disabled(self): for key in freeu_keys: assert getattr(upsample_block, key) is None, f"Disabling of FreeU should have set {key} to None." - output_no_freeu = pipe(prompt, num_inference_steps=1, output_type="np", generator=torch.manual_seed(0), return_dict=False)[0] - + inputs = self.get_dummy_inputs(torch_device) + inputs["return_dict"] = False + output_no_freeu = pipe(**inputs)[0] assert np.allclose( output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1] ), "Disabling of FreeU should lead to results similar to the default pipeline results." From a076831153a3f28cc9adb1e198cbadf29b72b867 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Tue, 27 Feb 2024 20:52:31 +0800 Subject: [PATCH 16/17] fix I2V gen test error --- tests/pipelines/test_pipelines_common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 0e3d20baeb8a..0ca464d3bd13 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -83,16 +83,20 @@ def test_vae_slicing(self): inputs = self.get_dummy_inputs(device) inputs["prompt"] = [inputs["prompt"]] * image_count + if "image" in inputs: # fix batch size mismatch in I2V_Gen pipeline + inputs["image"] = [inputs["image"]] * image_count output_1 = pipe(**inputs) # make sure sliced vae decode yields the same result pipe.enable_vae_slicing() inputs = self.get_dummy_inputs(device) inputs["prompt"] = [inputs["prompt"]] * image_count + if "image" in inputs: + inputs["image"] = [inputs["image"]] * image_count inputs["return_dict"] = False output_2 = pipe(**inputs) - assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 3e-3 + assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 1e-2 def test_vae_tiling(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -117,7 +121,7 @@ def test_vae_tiling(self): inputs["return_dict"] = False output_2 = pipe(**inputs)[0] - assert np.abs(output_2.flatten() - output_1.flatten()).max() < 5e-1 + assert np.abs(output_2 - output_1).max() < 5e-1 # test that tiled decode works with various shapes shapes = [(1, 4, 73, 97), (1, 4, 97, 73), (1, 4, 49, 65), (1, 4, 65, 49)] @@ -166,8 +170,8 @@ def test_freeu_disabled(self): inputs["return_dict"] = False output_no_freeu = pipe(**inputs)[0] assert np.allclose( - output[0, -3:, -3:, -1], output_no_freeu[0, -3:, -3:, -1] - ), "Disabling of FreeU should lead to results similar to the default pipeline results." + output, output_no_freeu, atol=1e-2 + ), f"Disabling of FreeU should lead to results similar to the default pipeline results but Max Abs Error={np.abs(output_no_freeu - output).max()}." def test_fused_qkv_projections(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator From 0fd684bc391bffe5790f291cc15bad2f5479e986 Mon Sep 17 00:00:00 2001 From: ultranity <1095429904@qq.com> Date: Wed, 28 Feb 2024 12:36:00 +0800 Subject: [PATCH 17/17] add missing StableDiffusionMixin --- .../community/clip_guided_images_mixing_stable_diffusion.py | 4 ++-- examples/community/clip_guided_stable_diffusion.py | 4 ++-- examples/community/clip_guided_stable_diffusion_img2img.py | 4 ++-- examples/community/imagic_stable_diffusion.py | 3 ++- examples/community/interpolate_stable_diffusion.py | 4 ++-- examples/community/mixture_canvas.py | 4 ++-- examples/community/multilingual_stable_diffusion.py | 4 ++-- examples/community/pipeline_sdxl_style_aligned.py | 2 +- examples/community/stable_diffusion_controlnet_img2img.py | 5 +++-- examples/community/stable_diffusion_controlnet_inpaint.py | 5 +++-- .../community/stable_diffusion_controlnet_inpaint_img2img.py | 5 +++-- examples/community/wildcard_stable_diffusion.py | 4 ++-- .../pipeline_stable_diffusion_pix2pix_zero.py | 4 ++-- 13 files changed, 28 insertions(+), 24 deletions(-) diff --git a/examples/community/clip_guided_images_mixing_stable_diffusion.py b/examples/community/clip_guided_images_mixing_stable_diffusion.py index 6fcbb16963b8..16dcecd7b22a 100644 --- a/examples/community/clip_guided_images_mixing_stable_diffusion.py +++ b/examples/community/clip_guided_images_mixing_stable_diffusion.py @@ -12,12 +12,12 @@ from diffusers import ( AutoencoderKL, DDIMScheduler, - DiffusionPipeline, DPMSolverMultistepScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, ) +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import PIL_INTERPOLATION from diffusers.utils.torch_utils import randn_tensor @@ -77,7 +77,7 @@ def set_requires_grad(model, value): param.requires_grad = value -class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline): +class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline, StableDiffusionMixin): def __init__( self, vae: AutoencoderKL, diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py index 9065462940c2..4205718802de 100644 --- a/examples/community/clip_guided_stable_diffusion.py +++ b/examples/community/clip_guided_stable_diffusion.py @@ -10,12 +10,12 @@ from diffusers import ( AutoencoderKL, DDIMScheduler, - DiffusionPipeline, DPMSolverMultistepScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, ) +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput @@ -51,7 +51,7 @@ def set_requires_grad(model, value): param.requires_grad = value -class CLIPGuidedStableDiffusion(DiffusionPipeline): +class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin): """CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000 - https://github.com/Jack000/glid-3-xl - https://github.dev/crowsonkb/k-diffusion diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py index 83e117f02dd2..434d5253679a 100644 --- a/examples/community/clip_guided_stable_diffusion_img2img.py +++ b/examples/community/clip_guided_stable_diffusion_img2img.py @@ -12,12 +12,12 @@ from diffusers import ( AutoencoderKL, DDIMScheduler, - DiffusionPipeline, DPMSolverMultistepScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel, ) +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import PIL_INTERPOLATION, deprecate from diffusers.utils.torch_utils import randn_tensor @@ -125,7 +125,7 @@ def set_requires_grad(model, value): param.requires_grad = value -class CLIPGuidedStableDiffusion(DiffusionPipeline): +class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin): """CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000 - https://github.com/Jack000/glid-3-xl - https://github.dev/crowsonkb/k-diffusion diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py index 057d46c4522b..25048e946fe0 100644 --- a/examples/community/imagic_stable_diffusion.py +++ b/examples/community/imagic_stable_diffusion.py @@ -19,6 +19,7 @@ from diffusers import DiffusionPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -56,7 +57,7 @@ def preprocess(image): return 2.0 * image - 1.0 -class ImagicStableDiffusionPipeline(DiffusionPipeline): +class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for imagic image editing. See paper here: https://arxiv.org/pdf/2210.09276.pdf diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py index 4c13e0046b9a..1b859c35f174 100644 --- a/examples/community/interpolate_stable_diffusion.py +++ b/examples/community/interpolate_stable_diffusion.py @@ -7,9 +7,9 @@ import torch from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -46,7 +46,7 @@ def slerp(t, v0, v1, DOT_THRESHOLD=0.9995): return v2 -class StableDiffusionWalkPipeline(DiffusionPipeline): +class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion. diff --git a/examples/community/mixture_canvas.py b/examples/community/mixture_canvas.py index 3737183e5513..2083c7acad38 100644 --- a/examples/community/mixture_canvas.py +++ b/examples/community/mixture_canvas.py @@ -12,7 +12,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -264,7 +264,7 @@ def _quartic_weights(self, region: DiffusionRegion) -> torch.tensor: return torch.tile(torch.tensor(weights), (self.nbatch, self.latent_space_dim, 1, 1)) -class StableDiffusionCanvasPipeline(DiffusionPipeline): +class StableDiffusionCanvasPipeline(DiffusionPipeline, StableDiffusionMixin): """Stable Diffusion pipeline that mixes several diffusers in the same canvas""" def __init__( diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py index 0a3b49a14d7d..f3b0540cf4d3 100644 --- a/examples/community/multilingual_stable_diffusion.py +++ b/examples/community/multilingual_stable_diffusion.py @@ -11,9 +11,9 @@ pipeline, ) -from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -48,7 +48,7 @@ def translate_prompt(prompt, translation_tokenizer, translation_model, device): return en_trans[0] -class MultilingualStableDiffusion(DiffusionPipeline): +class MultilingualStableDiffusion(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for text-to-image generation using Stable Diffusion in different languages. diff --git a/examples/community/pipeline_sdxl_style_aligned.py b/examples/community/pipeline_sdxl_style_aligned.py index db19533a3d2c..ec4aa3791557 100644 --- a/examples/community/pipeline_sdxl_style_aligned.py +++ b/examples/community/pipeline_sdxl_style_aligned.py @@ -85,7 +85,7 @@ >>> from typing import List >>> import torch - >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline,StableDiffusionMixin + >>> from diffusers.pipelines.pipeline_utils import DiffusionPipeline >>> from PIL import Image >>> model_id = "a-r-r-o-w/dreamshaper-xl-turbo" diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py index f961c767e416..5f9083616a84 100644 --- a/examples/community/stable_diffusion_controlnet_img2img.py +++ b/examples/community/stable_diffusion_controlnet_img2img.py @@ -8,8 +8,9 @@ import torch from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging +from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -128,7 +129,7 @@ def prepare_controlnet_conditioning_image( return controlnet_conditioning_image -class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): +class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin): """ Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/ """ diff --git a/examples/community/stable_diffusion_controlnet_inpaint.py b/examples/community/stable_diffusion_controlnet_inpaint.py index 76e6e331abcb..0173ed41bee6 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint.py +++ b/examples/community/stable_diffusion_controlnet_inpaint.py @@ -9,8 +9,9 @@ import torch.nn.functional as F from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging +from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -226,7 +227,7 @@ def prepare_controlnet_conditioning_image( return controlnet_conditioning_image -class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline): +class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusionMixin): """ Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/ """ diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py index 34b8170f66c8..d056eb112165 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py +++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py @@ -9,7 +9,8 @@ import torch.nn.functional as F from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from diffusers import AutoencoderKL, ControlNetModel, DiffusionPipeline, UNet2DConditionModel, logging +from diffusers import AutoencoderKL, ControlNetModel, UNet2DConditionModel, logging +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import ( @@ -215,7 +216,7 @@ def prepare_controlnet_conditioning_image( return controlnet_conditioning_image -class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline): +class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin): """ Inspired by: https://github.com/haofanwang/ControlNet-for-Diffusers/ """ diff --git a/examples/community/wildcard_stable_diffusion.py b/examples/community/wildcard_stable_diffusion.py index 1a5ea350b857..241e661536d3 100644 --- a/examples/community/wildcard_stable_diffusion.py +++ b/examples/community/wildcard_stable_diffusion.py @@ -8,9 +8,9 @@ import torch from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer -from diffusers import DiffusionPipeline from diffusers.configuration_utils import FrozenDict from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler @@ -63,7 +63,7 @@ class WildcardStableDiffusionOutput(StableDiffusionPipelineOutput): prompts: List[str] -class WildcardStableDiffusionPipeline(DiffusionPipeline): +class WildcardStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Example Usage: pipe = WildcardStableDiffusionPipeline.from_pretrained( diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py index c2e2369f27f8..c819e5728181 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py @@ -46,7 +46,7 @@ unscale_lora_layers, ) from ....utils.torch_utils import randn_tensor -from ...pipeline_utils import DiffusionPipeline +from ...pipeline_utils import DiffusionPipeline, StableDiffusionMixin from ...stable_diffusion.pipeline_output import StableDiffusionPipelineOutput from ...stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -280,7 +280,7 @@ def __call__( return hidden_states -class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): +class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin): r""" Pipeline for pixel-level image editing using Pix2Pix Zero. Based on Stable Diffusion.